
*****************************************************************
** Data Prep Do file for 
** "Inferring Tax Compliance From Pass-Through: Evidence From Airbnb Tax Enforcement Agreements"
** By Andrew J. Bibler, Keith F. Teltser, and Mark J. Tremblay
*************************************************************************************
***cleaning up property file that contains counties and cities for later merging

clear
set more off

use combined_property_file_geocoded.dta, clear

gen nonuscoords = 0
replace nonuscoords = 1 if g_country!="United States of America" & !missing(g_lat)

replace city = g_city if missing(city)
replace state = g_state if missing(state)
replace zipcode = real(g_postcode) if zipcode==0

gen county_suffix = substr(g_county,-7,.)
gen countyrev = strreverse(g_county) if county_suffix==" County"
gen countyrev1 = substr(countyrev,8,.) if county_suffix==" County"
gen countyrev2 = strreverse(countyrev1) if county_suffix==" County"
replace countyname = countyrev2 if missing(countyname)
replace countyname = g_county if missing(countyname)

drop dup idindex county_suffix countyrev countyrev1 countyrev2

gen tax_city = city
replace tax_city = gis_city if city!=gis_city & !missing(gis_city)

*additional_reverse_geocodes.dta" not provided - must create yourself for any properties still missing location info.  See step 4 in location assignment readme.
merge 1:1 propertyid using "additional_reverse_geocodes.dta", gen(_mergegeocode) keepusing(d_locality d_city d_county d_state d_zipcode)

*to adjust d_county into proper format:
gen county_suffix = substr(d_county,-7,.)
gen countyrev = strreverse(d_county) if county_suffix==" County"
gen countyrev1 = substr(countyrev,8,.) if county_suffix==" County"
gen countyrev2 = strreverse(countyrev1) if county_suffix==" County"
replace fips = "." if countyrev2 != countyname & _mergegeocode==3 & missing(tax_city) & !missing(countyrev2)
replace countyfips = "." if countyrev2 != countyname & _mergegeocode==3 & missing(tax_city) & !missing(countyrev2)
replace countyname = countyrev2 if countyrev2 != countyname & _mergegeocode==3 & missing(tax_city) & !missing(countyrev2)
drop county_suffix countyrev countyrev1 countyrev2

*to fill in tax_city with geocoded info from d_city/d_locality
*first replace with d_city, if still missing then replace with d_locality
replace tax_city = d_city if _mergegeocode==3 & missing(tax_city)
replace tax_city = d_locality if _mergegeocode==3 & missing(tax_city)

*drop those still missing tax_city (4 total)
drop if missing(tax_city)

destring statefips, replace
destring countyfips, replace
destring fips, replace

*bandaids to fix location issues
replace countyname = "San Francisco" if countyname=="San Francisco City and"
replace tax_city="Rancho Santa Margarita" if tax_city=="Rancho Santa Margari"
replace state="Virginia" if city=="Arlington" & zipcode==22209

*batch of fixes (useful website: https://www.mapdevelopers.com/what-county-am-i-in.php)
replace statefips=11 if inlist(zipcode,20011,20012) & countyname=="Montgomery" & state=="District of Columbia"
replace countyfips=1 if inlist(zipcode,20011,20012) & countyname=="Montgomery" & state=="District of Columbia"
replace fips=11001 if inlist(zipcode,20011,20012) & countyname=="Montgomery" & state=="District of Columbia"
replace tax_city=city if inlist(zipcode,20011,20012) & countyname=="Montgomery" & state=="District of Columbia"
replace countyname="District of Columbia" if inlist(zipcode,20011,20012) & countyname=="Montgomery" & state=="District of Columbia"
replace state = "Maryland" if inlist(zipcode,20910,20815,20902,20912) & countyname=="Montgomery" & state=="District of Columbia"
replace state="District of Columbia" if inlist(zipcode,20032,20012,20015,20019) & countyname=="District of Columbia" & state=="Maryland"
replace statefips=24 if inlist(zipcode,20782,20815,20743) & countyname=="District of Columbia" & state=="Maryland"
replace tax_city=city if inlist(zipcode,20782,20815,20743) & countyname=="District of Columbia" & state=="Maryland"
replace countyfips=33 if inlist(zipcode,20782,20743) & countyname=="District of Columbia" & state=="Maryland"
replace fips=24033 if inlist(zipcode,20782,20743) & countyname=="District of Columbia" & state=="Maryland"
replace countyfips=31 if inlist(zipcode,20815) & countyname=="District of Columbia" & state=="Maryland"
replace fips=24031 if inlist(zipcode,20815) & countyname=="District of Columbia" & state=="Maryland"
replace countyname="Montgomery" if inlist(zipcode,20815) & countyname=="District of Columbia" & state=="Maryland"
replace countyname="Prince George's" if inlist(zipcode,20782,20743) & countyname=="District of Columbia" & state=="Maryland"
replace state = "New York" if inlist(zipcode,10019,10035,10014,10003) & countyname=="New York" & state=="New Jersey"
replace statefips = 34 if inlist(zipcode,7446,7047,7105,7087,7020,7302) & countyname=="New York" & state=="New Jersey"
replace tax_city = city if inlist(zipcode,7446,7047,7105,7087,7020,7302) & countyname=="New York" & state=="New Jersey"
replace countyfips = 3 if inlist(zipcode,7446,7020) & countyname=="New York" & state=="New Jersey"
replace fips = 34003 if inlist(zipcode,7446,7020) & countyname=="New York" & state=="New Jersey"
replace countyname = "Bergen" if inlist(zipcode,7446,7020) & countyname=="New York" & state=="New Jersey"
replace countyfips = 17 if inlist(zipcode,7302,7087,7047) & countyname=="New York" & state=="New Jersey"
replace fips = 34017 if inlist(zipcode,7302,7087,7047) & countyname=="New York" & state=="New Jersey"
replace countyname = "Hudson" if inlist(zipcode,7302,7087,7047) & countyname=="New York" & state=="New Jersey"
replace countyfips = 13 if inlist(zipcode,7105) & countyname=="New York" & state=="New Jersey"
replace fips = 34013 if inlist(zipcode,7105) & countyname=="New York" & state=="New Jersey"
replace countyname = "Essex" if inlist(zipcode,7105) & countyname=="New York" & state=="New Jersey"
replace state = "New Jersey" if inlist(zipcode,7030,7086) & countyname=="Hudson" & state=="New York"
replace statefips=36 if inlist(zipcode,10009,11101,10007) & countyname=="Hudson" & state=="New York"
replace tax_city = city if inlist(zipcode,10009,11101,10007) & countyname=="Hudson" & state=="New York"
replace countyfips=61 if inlist(zipcode,10009,10007) & countyname=="Hudson" & state=="New York"
replace fips=36061 if inlist(zipcode,10009,10007) & countyname=="Hudson" & state=="New York"
replace countyname="New York" if inlist(zipcode,10009,10007) & countyname=="Hudson" & state=="New York"
replace countyfips=81 if inlist(zipcode,11101) & countyname=="Hudson" & state=="New York"
replace fips=36081 if inlist(zipcode,11101) & countyname=="Hudson" & state=="New York"
replace countyname="Queens" if inlist(zipcode,11101) & countyname=="Hudson" & state=="New York"
replace countyfips = 510 if tax_city=="Alexandria" & countyname=="Arlington"
replace fips = 51510 if tax_city=="Alexandria" & countyname=="Arlington"
replace countyname = "Alexandria" if tax_city=="Alexandria" & countyname=="Arlington"
replace countyfips = 87 if tax_city=="Arabi" & countyname=="Orleans"
replace fips = 22087 if tax_city=="Arabi" & countyname=="Orleans"
replace countyname="St. Bernard" if tax_city=="Arabi" & countyname=="Orleans"
replace countyfips = 13 if tax_city=="Arlington" & countyname=="Alexandria"
replace fips = 51013 if tax_city=="Arlington" & countyname=="Alexandria"
replace countyname = "Arlington" if tax_city=="Arlington" & countyname=="Alexandria"
replace countyfips = 99 if tax_city=="Boca Raton" & countyname=="Broward"
replace fips = 12099 if tax_city=="Boca Raton" & countyname=="Broward"
replace countyname = "Palm Beach" if tax_city=="Boca Raton" & countyname=="Broward"
replace countyfips = 91 if tax_city=="Cheltenham" & countyname=="Philadelphia"
replace fips = 42091 if tax_city=="Cheltenham" & countyname=="Philadelphia"
replace countyname = "Montgomery" if tax_city=="Cheltenham" & countyname=="Philadelphia"
replace countyfips = 69 if tax_city=="Clermont" & countyname=="Polk"
replace fips = 12069 if tax_city=="Clermont" & countyname=="Polk"
replace countyname = "Lake" if tax_city=="Clermont" & countyname=="Polk"
replace countyfips = 103 if tax_city=="Cold Spring Harbor" & countyname=="Nassau"
replace fips = 36103 if tax_city=="Cold Spring Harbor" & countyname=="Nassau"
replace countyname = "Suffolk" if tax_city=="Cold Spring Harbor" & countyname=="Nassau"
replace countyfips = 81 if tax_city=="Daly City" & countyname=="San Francisco"
replace fips = 6081 if tax_city=="Daly City" & countyname=="San Francisco"
replace countyname = "San Mateo" if tax_city=="Daly City" & countyname=="San Francisco"
replace countyfips = 81 if tax_city=="East Palo Alto" & countyname=="Santa Clara"
replace fips = 6081 if tax_city=="East Palo Alto" & countyname=="Santa Clara"
replace countyname = "San Mateo" if tax_city=="East Palo Alto" & countyname=="Santa Clara"
replace countyfips = 13 if tax_city=="El Cerrito" & countyname=="Alameda"
replace fips = 6013 if tax_city=="El Cerrito" & countyname=="Alameda"
replace countyname = "Contra Costa" if tax_city=="El Cerrito" & countyname=="Alameda"
replace countyfips = 86 if tax_city=="Golden Beach" & countyname=="Broward"
replace fips = 12086 if tax_city=="Golden Beach" & countyname=="Broward"
replace countyname = "Miami-Dade" if tax_city=="Golden Beach" & countyname=="Broward"
replace countyfips = 59 if tax_city=="Huntington" & countyname=="Alexandria"
replace fips = 51059 if tax_city=="Huntington" & countyname=="Alexandria"
replace countyname = "Fairfax" if tax_city=="Huntington" & countyname=="Alexandria"
replace countyfips = 13 if tax_city=="Irvington" & countyname=="Union"
replace fips = 34013 if tax_city=="Irvington" & countyname=="Union"
replace countyname = "Essex" if tax_city=="Irvington" & countyname=="Union"
replace countyfips = 37 if tax_city=="La Mirada" & countyname=="Orange"
replace fips = 6037 if tax_city=="La Mirada" & countyname=="Orange"
replace countyname = "Los Angeles" if tax_city=="La Mirada" & countyname=="Orange"
replace countyfips = 59 if tax_city=="Lincolnia" & countyname=="Alexandria"
replace fips = 51059 if tax_city=="Lincolnia" & countyname=="Alexandria"
replace countyname = "Fairfax" if tax_city=="Lincolnia" & countyname=="Alexandria"
replace countyfips = 13 if tax_city=="Maplewood" & countyname=="Union"
replace fips = 34013 if tax_city=="Maplewood" & countyname=="Union"
replace countyname = "Essex" if tax_city=="Maplewood" & countyname=="Union"
replace countyfips = 5 if tax_city=="Marlton" & countyname=="Camden"
replace fips = 34005 if tax_city=="Marlton" & countyname=="Camden"
replace countyname = "Burlington" if tax_city=="Marlton" & countyname=="Camden"
replace countyfips = 81 if tax_city=="Menlo Park" & countyname=="Santa Clara"
replace fips = 6081 if tax_city=="Menlo Park" & countyname=="Santa Clara"
replace countyname = "San Mateo" if tax_city=="Menlo Park" & countyname=="Santa Clara"
replace countyfips = 23 if tax_city=="New Brunswick" & countyname=="Somerset"
replace fips = 34023 if tax_city=="New Brunswick" & countyname=="Somerset"
replace countyname = "Middlesex" if tax_city=="New Brunswick" & countyname=="Somerset"
replace countyfips = 71 if tax_city=="New Orleans" & countyname=="Jefferson"
replace fips = 22071 if tax_city=="New Orleans" & countyname=="Jefferson"
replace countyname = "Orleans" if tax_city=="New Orleans" & countyname=="Jefferson"
replace tax_city = "New York" if tax_city=="New York City" & state=="New York"
replace countyfips = 1 if tax_city=="Oakland" & countyname=="Contra Costa"
replace countyfips = 6001 if tax_city=="Oakland" & countyname=="Contra Costa"
replace countyname = "Alameda" if tax_city=="Oakland" & countyname=="Contra Costa"
replace countyfips = 101 if tax_city=="Philadelphia" & countyname=="Delaware"
replace fips = 42101 if tax_city=="Philadelphia" & countyname=="Delaware"
replace countyname = "Philadelphia" if tax_city=="Philadelphia" & countyname=="Delaware"
replace countyfips = 101 if tax_city=="Philadelphia" & countyname=="Montgomery"
replace fips = 42101 if tax_city=="Philadelphia" & countyname=="Montgomery"
replace countyname = "Philadelphia" if tax_city=="Philadelphia" & countyname=="Montgomery"
replace countyfips = 29 if tax_city=="Point Pleasant Beach" & countyname=="Monmouth"
replace fips = 34029 if tax_city=="Point Pleasant Beach" & countyname=="Monmouth"
replace countyname = "Ocean" if tax_city=="Point Pleasant Beach" & countyname=="Monmouth"
replace countyfips = 3 if tax_city=="Rutherford" & countyname=="Passaic"
replace fips = 34003 if tax_city=="Rutherford" & countyname=="Passaic"
replace countyname = "Bergen" if tax_city=="Rutherford" & countyname=="Passaic"
replace countyfips = 59 if tax_city=="Seal Beach" & countyname=="Los Angeles"
replace fips = 6059 if tax_city=="Seal Beach" & countyname=="Los Angeles"
replace countyname = "Orange" if tax_city=="Seal Beach" & countyname=="Los Angeles"
replace countyfips = 35 if tax_city=="Somerset" & countyname=="Middlesex"
replace fips = 34035 if tax_city=="Somerset" & countyname=="Middlesex"
replace countyname = "Somerset" if tax_city=="Somerset" & countyname=="Middlesex"
replace countyfips = 39 if tax_city=="Springfield" & countyname=="Essex"
replace fips = 34039 if tax_city=="Springfield" & countyname=="Essex"
replace countyname = "Union" if tax_city=="Springfield" & countyname=="Essex"
replace countyfips = 39 if tax_city=="Summit" & countyname=="Essex"
replace fips = 34039 if tax_city=="Summit" & countyname=="Essex"
replace countyname = "Union" if tax_city=="Summit" & countyname=="Essex"
replace countyfips = 31 if tax_city=="Takoma Park" & countyname=="Prince George's"
replace fips = 24031 if tax_city=="Takoma Park" & countyname=="Prince George's"
replace countyname = "Montgomery" if tax_city=="Takoma Park" & countyname=="Prince George's"
replace tax_city = "Southampton" if tax_city=="Tuckahoe" & countyname=="Suffolk"
replace countyfips = 5 if tax_city=="Aurora" & countyname=="Denver"
replace fips = 8005 if tax_city=="Aurora" & countyname=="Denver"
replace countyname = "Arapahoe" if tax_city=="Aurora" & countyname=="Denver"
replace countyfips = 25 if tax_city=="Boston" & countyname=="Middlesex"
replace fips = 25025 if tax_city=="Boston" & countyname=="Middlesex"
replace countyname = "Suffolk" if tax_city=="Boston" & countyname=="Middlesex"
replace countyfips = 21 if tax_city=="Brookline" & countyname=="Suffolk"
replace fips = 25021 if tax_city=="Brookline" & countyname=="Suffolk"
replace countyname = "Norfolk" if tax_city=="Brookline" & countyname=="Suffolk"
replace countyfips = 17 if tax_city=="Cambridge" & countyname=="Suffolk"
replace fips = 25017 if tax_city=="Cambridge" & countyname=="Suffolk"
replace countyname = "Middlesex" if tax_city=="Cambridge" & countyname=="Suffolk"
replace countyfips = 85 if tax_city=="Dawsonville" & countyname=="Hall"
replace fips = 13085 if tax_city=="Dawsonville" & countyname=="Hall"
replace countyname = "Dawson" if tax_city=="Dawsonville" & countyname=="Hall"
replace countyfips = 21 if tax_city=="Dedham" & countyname=="Suffolk"
replace fips = 25021 if tax_city=="Dedham" & countyname=="Suffolk"
replace countyname = "Norfolk" if tax_city=="Dedham" & countyname=="Suffolk"
replace tax_city = "Lakewood" if tax_city=="Denver" & countyname=="Jefferson" & missing(gis_city)
replace countyfips = 31 if tax_city=="Denver" & countyname=="Jefferson" & gis_city=="Denver"
replace fips = 8031 if tax_city=="Denver" & countyname=="Jefferson" & gis_city=="Denver"
replace countyname = "Denver" if tax_city=="Denver" & countyname=="Jefferson" & gis_city=="Denver"
replace countyfips = 31 if tax_city=="Denver" & countyname=="Arapahoe" & gis_city=="Denver"
replace fips = 8031 if tax_city=="Denver" & countyname=="Arapahoe" & gis_city=="Denver"
replace countyname = "Denver" if tax_city=="Denver" & countyname=="Arapahoe" & gis_city=="Denver"
replace countyfips = 17 if tax_city=="Everett" & countyname=="Suffolk"
replace fips = 25017 if tax_city=="Everett" & countyname=="Suffolk"
replace countyname = "Middlesex" if tax_city=="Everett" & countyname=="Suffolk"
replace tax_city = "Indianapolis" if tax_city=="Indianapolis city (balance)"
replace tax_city = "Louisville" if tax_city=="Louisville/Jefferson County metro government (balance)"
replace countyfips = 21 if tax_city=="Milton" & countyname=="Suffolk"
replace fips = 25021 if tax_city=="Milton" & countyname=="Suffolk"
replace countyname = "Norfolk" if tax_city=="Milton" & countyname=="Suffolk"
replace countyfips = 53 if tax_city=="Minneapolis" & countyname=="Ramsey"
replace fips = 27053 if tax_city=="Minneapolis" & countyname=="Ramsey"
replace countyname = "Hennepin" if tax_city=="Minneapolis" & countyname=="Ramsey"
replace tax_city = "Nashville" if tax_city=="Nashville-Davidson metropolitan government (balance)"
replace countyfips = 17 if tax_city=="Newton" & countyname=="Norfolk"
replace countyfips = 25017 if tax_city=="Newton" & countyname=="Norfolk"
replace countyname = "Middlesex" if tax_city=="Newton" & countyname=="Norfolk"
replace countyfips = 17 if tax_city=="Newton" & countyname=="Suffolk"
replace countyfips = 25017 if tax_city=="Newton" & countyname=="Suffolk"
replace countyname = "Middlesex" if tax_city=="Newton" & countyname=="Suffolk"
replace countyfips = 25 if tax_city=="Revere" & countyname=="Essex"
replace fips = 25025 if tax_city=="Revere" & countyname=="Essex"
replace countyname = "Suffolk" if tax_city=="Revere" & countyname=="Essex"
replace countyfips = 123 if tax_city=="St. Paul" & countyname=="Dakota"
replace fips = 27123 if tax_city=="St. Paul" & countyname=="Dakota"
replace countyname = "Ramsey" if tax_city=="St. Paul" & countyname=="Dakota"
replace countyfips = 17 if tax_city=="Watertown Town" & countyname=="Suffolk"
replace fips = 25017 if tax_city=="Watertown Town" & countyname=="Suffolk"
replace countyname = "Middlesex" if tax_city=="Watertown Town" & countyname=="Suffolk"
replace tax_city = "Watertown" if tax_city=="Watertown Town" & state=="Massachusetts"
replace countyfips = 21 if tax_city=="Weymouth Town" & countyname=="Plymouth"
replace fips = 25021 if tax_city=="Weymouth Town" & countyname=="Plymouth"
replace countyname = "Norfolk" if tax_city=="Weymouth Town" & countyname=="Plymouth"
replace tax_city = "Weymouth" if tax_city=="Weymouth Town" & state=="Massachusetts"
replace countyfips = 103 if tax_city=="Amityville" & countyname=="Nassau"
replace fips = 36103 if tax_city=="Amityville" & countyname=="Nassau"
replace countyname = "Suffolk" if tax_city=="Amityville" & countyname=="Nassau"
replace countyfips = 69 if tax_city=="Astor" & countyname=="Volusia"
replace fips = 12069 if tax_city=="Astor" & countyname=="Volusia"
replace countyname = "Lake" if tax_city=="Astor" & countyname=="Volusia"
replace state = "Virginia" if countyname=="Alexandria" & state=="District of Columbia" & city=="Alexandria"
replace state = "Virginia" if countyname=="Arlington" & state=="Maryland"
replace state = "Virginia" if countyname=="Arlington" & state=="District of Columbia"
replace state = "New Jersey" if countyname=="Bergen" & city=="Rutherford" & state=="New York"
replace state = "Florida" if countyname=="Broward" & city=="Hallandale" & state=="New York"
replace state = "Georgia" if countyname=="DeKalb" & city=="Atlanta" & state=="Oklahoma"
replace state = "District of Columbia" if countyname=="District of Columbia" & city=="Washington" & state=="California"
replace state = "Massachusetts" if countyname=="Essex" & city=="Gloucester" & state=="Delaware"
replace state = "Virginia" if countyname=="Fairfax" & city=="Potomac" & state=="Maryland" & gis_city=="McLean"
replace state = "New Jersey" if countyname=="Hudson" & city=="Jersey City" & state=="New York"
replace state = "California" if countyname=="Los Angeles" & city=="Beverly Hills" & state=="Michigan"
replace state = "Arizona" if countyname=="Maricopa" & city=="Fountain Hills" & state=="California"
replace state = "Florida" if countyname=="Miami-Dade" & city=="Miami Beach" & state=="Massachusetts"
replace state = "New York" if countyname=="New York" & gis_city=="New York" & state=="New Jersey"
replace state = "New York" if countyname=="New York" & gis_city=="New York" & inlist(state,"Arizona","Illinois","Maryland","Pennsylvania")
replace state = "Pennsylvania" if countyname=="Philadelphia" & gis_city=="Philadelphia" & inlist(state,"Florida","New York")
replace state = "Maryland" if countyname=="Prince George's" & gis_city=="Suitland" & state=="District of Columbia"
replace statefips = 11 if countyname=="Prince George's" & gis_city=="Chillum" & city=="Washington" & state=="District of Columbia"
replace countyfips = 1 if countyname=="Prince George's" & gis_city=="Chillum" & city=="Washington" & state=="District of Columbia"
replace fips = 11001 if countyname=="Prince George's" & gis_city=="Chillum" & city=="Washington" & state=="District of Columbia"
replace tax_city = "Washington" if countyname=="Prince George's" & gis_city=="Chillum" & city=="Washington" & state=="District of Columbia"
replace countyname = "District of Columbia" if countyname=="Prince George's" & gis_city=="Chillum" & city=="Washington" & state=="District of Columbia"
replace countyname = "Essex" if countyname=="Rockingham" & state=="Massachusetts" & tax_city=="Salisbury"
replace countyfips = 103 if city=="Bushkill" & countyname=="Sussex" & state=="Pennsylvania"
replace fips = 42103 if city=="Bushkill" & countyname=="Sussex" & state=="Pennsylvania"
replace countyname = "Pike" if city=="Bushkill" & countyname=="Sussex" & state=="Pennsylvania"
replace statefips = 54 if countyname=="Washington" & city=="Harpers Ferry" & state=="West Virginia"
replace countyfips = 37 if countyname=="Washington" & city=="Harpers Ferry" & state=="West Virginia"
replace fips = 54037 if countyname=="Washington" & city=="Harpers Ferry" & state=="West Virginia"
replace countyname = "Jefferson" if countyname=="Washington" & city=="Harpers Ferry" & state=="West Virginia"
replace state = "Tennessee" if countyname=="Sumner" & city=="Portland" & state=="Kansas"
replace countyfips = 59 if countyname=="Arlington" & tax_city=="Bailey's Crossroads" & state=="Virginia"
replace fips = 51059 if countyname=="Arlington" & tax_city=="Bailey's Crossroads" & state=="Virginia"
replace countyname = "Fairfax" if countyname=="Arlington" & tax_city=="Bailey's Crossroads" & state=="Virginia"
replace countyfips = 1 if countyname=="Contra Costa" & tax_city=="Berkeley" & state=="California"
replace fips = 6001 if countyname=="Contra Costa" & tax_city=="Berkeley" & state=="California"
replace countyname = "Alameda" if countyname=="Contra Costa" & tax_city=="Berkeley" & state=="California"
replace countyfips = 25 if countyname=="Norfolk" & city=="Dorchester" & state=="Massachusetts"
replace fips = 25025 if countyname=="Norfolk" & city=="Dorchester" & state=="Massachusetts"
replace countyname = "Suffolk" if countyname=="Norfolk" & city=="Dorchester" & state=="Massachusetts"
replace tax_city="Brookline" if state=="Massachusetts" & city=="Chestnut Hill" & countyname=="Norfolk" & gis_city=="Boston"
replace countyfips = 21 if countyname=="Middlesex" & gis_city=="Brookline" & state=="Massachusetts"
replace fips = 25021 if countyname=="Middlesex" & gis_city=="Brookline" & state=="Massachusetts"
replace countyname = "Norfolk" if countyname=="Middlesex" & gis_city=="Brookline" & state=="Massachusetts"
replace countyfips = 14 if !inlist(countyname,"Broomfield","Jefferson") & state=="Colorado" & tax_city=="Broomfield"
replace fips = 8014 if !inlist(countyname,"Broomfield","Jefferson") & state=="Colorado" & tax_city=="Broomfield"
replace countyname = "Broomfield" if !inlist(countyname,"Broomfield","Jefferson") & state=="Colorado" & tax_city=="Broomfield"
replace tax_city = "San Jose" if tax_city=="Burbank" & countyname=="Santa Clara" & state=="California" & city=="San Jose"
replace countyfips = 119 if tax_city=="Charlotte" & state=="North Carolina" & countyname!="Mecklenburg" & city=="Charlotte"
replace fips = 37119 if tax_city=="Charlotte" & state=="North Carolina" & countyname!="Mecklenburg" & city=="Charlotte"
replace countyname = "Mecklenburg" if tax_city=="Charlotte" & state=="North Carolina" & countyname!="Mecklenburg" & city=="Charlotte"
replace tax_city = "Rockwall" if gis_city=="Dallas" & state=="Texas" & countyname=="Rockwall" & city=="Rockwall"
replace countyfips = 31 if gis_city=="Denver" & city=="Denver" & countyname=="Adams" & state=="Colorado"
replace fips = 8031 if gis_city=="Denver" & city=="Denver" & countyname=="Adams" & state=="Colorado"
replace countyname = "Denver" if gis_city=="Denver" & city=="Denver" & countyname=="Adams" & state=="Colorado"
replace tax_city = city if gis_city=="Fort Worth" & state=="Texas" & countyname=="Denton"
replace countyfips = 69 if tax_city=="Four Corners" & state=="Florida" & countyname=="Polk" & city=="Clermont"
replace fips = 12069 if tax_city=="Four Corners" & state=="Florida" & countyname=="Polk" & city=="Clermont"
replace countyname = "Lake" if tax_city=="Four Corners" & state=="Florida" & countyname=="Polk" & city=="Clermont"
replace countyfips = 59 if tax_city=="Fullerton" & countyname=="Los Angeles" & state=="California"
replace fips = 6059 if tax_city=="Fullerton" & countyname=="Los Angeles" & state=="California"
replace countyname = "Orange" if tax_city=="Fullerton" & countyname=="Los Angeles" & state=="California"
replace tax_city = "Spring" if city=="Spring" & countyname=="Montgomery" & state=="Texas" & gis_city=="Houston" & tax_city=="Houston"
replace tax_city = city if gis_city=="Houston" & tax_city=="Houston" & countyname=="Fort Bend" & state=="Texas"
replace countyfips = 97 if tax_city=="Indianapolis" & state=="Indiana" & countyname=="Hamilton" & !missing(gis_city)
replace fips = 18097 if tax_city=="Indianapolis" & state=="Indiana" & countyname=="Hamilton" & !missing(gis_city)
replace countyname = "Marion" if tax_city=="Indianapolis" & state=="Indiana" & countyname=="Hamilton" & !missing(gis_city)
replace countyfips = 97 if tax_city=="Indianapolis" & state=="Indiana" & countyname=="Hendricks" & !missing(gis_city)
replace fips = 18097 if tax_city=="Indianapolis" & state=="Indiana" & countyname=="Hendricks" & !missing(gis_city)
replace countyname = "Marion" if tax_city=="Indianapolis" & state=="Indiana" & countyname=="Hendricks" & !missing(gis_city)
replace tax_city = city if gis_city=="Lakewood" & countyname=="Denver" & state=="Colorado"
replace countyfips = 59 if gis_city=="Lakewood" & countyname=="Denver" & state=="Colorado" & city=="Littleton"
replace fips = 8059 if gis_city=="Lakewood" & countyname=="Denver" & state=="Colorado" & city=="Littleton"
replace countyname = "Jefferson" if gis_city=="Lakewood" & countyname=="Denver" & state=="Colorado" & city=="Littleton"
replace countyfips = 111 if tax_city=="Louisville" & countyname=="Bullitt" & state=="Kentucky"
replace fips = 21111 if tax_city=="Louisville" & countyname=="Bullitt" & state=="Kentucky"
replace countyname = "Jefferson" if tax_city=="Louisville" & countyname=="Bullitt" & state=="Kentucky"
replace countyfips = 85 if tax_city=="Milpitas" & state=="California" & countyname=="Alameda"
replace fips = 6085 if tax_city=="Milpitas" & state=="California" & countyname=="Alameda"
replace countyname = "Santa Clara" if tax_city=="Milpitas" & state=="California" & countyname=="Alameda"
replace tax_city = "Martinez" if gis_city=="Mountain View" & countyname=="Contra Costa" & state=="California"
replace countyfips = 37 if tax_city=="Nashville" & countyname=="Sumner" & city=="Goodlettsville" & state=="Tennessee"
replace fips = 47037 if tax_city=="Nashville" & countyname=="Sumner" & city=="Goodlettsville" & state=="Tennessee"
replace tax_city = "Goodlettsville" if tax_city=="Nashville" & countyname=="Sumner" & city=="Goodlettsville" & state=="Tennessee"
replace countyname = "Davidson" if tax_city=="Goodlettsville" & countyname=="Sumner" & city=="Goodlettsville" & state=="Tennessee" & gis_city=="Nashville-Davidson metropolitan government (balance)"
replace countyfips = 37 if tax_city=="Nashville" & countyname=="Williamson" & city=="Nolensville" & state=="Tennessee"
replace fips = 47037 if tax_city=="Nashville" & countyname=="Williamson" & city=="Nolensville" & state=="Tennessee"
replace tax_city = "Nolensville" if tax_city=="Nashville" & countyname=="Williamson" & city=="Nolensville" & state=="Tennessee"
replace countyname = "Davidson" if tax_city=="Nolensville" & countyname=="Williamson" & city=="Nolensville" & state=="Tennessee" & gis_city=="Nashville-Davidson metropolitan government (balance)"
replace countyfips = 81 if tax_city=="New York" & countyname=="Nassau" & state=="New York"
replace fips = 36081 if tax_city=="New York" & countyname=="Nassau" & state=="New York"
replace countyname = "Queens" if tax_city=="New York" & countyname=="Nassau" & state=="New York"
replace countyfips = 5 if tax_city=="New York" & countyname=="Westchester" & state=="New York"
replace fips = 36005 if tax_city=="New York" & countyname=="Westchester" & state=="New York"
replace countyname = "Bronx" if tax_city=="New York" & countyname=="Westchester" & state=="New York"
replace countyfips = 13 if tax_city=="Newark" & countyname=="Union" & state=="New Jersey"
replace fips = 34013 if tax_city=="Newark" & countyname=="Union" & state=="New Jersey"
replace countyname = "Essex" if tax_city=="Newark" & countyname=="Union" & state=="New Jersey"
replace countyfips = 17 if tax_city=="North Bergen" & countyname=="Bergen" & state=="New Jersey"
replace fips = 34017 if tax_city=="North Bergen" & countyname=="Bergen" & state=="New Jersey"
replace countyname = "Hudson" if tax_city=="North Bergen" & countyname=="Bergen" & state=="New Jersey"
replace countyfips = 85 if tax_city=="Palo Alto" & countyname=="San Mateo" & state=="California"
replace fips = 6085 if tax_city=="Palo Alto" & countyname=="San Mateo" & state=="California"
replace countyname = "Santa Clara" if tax_city=="Palo Alto" & countyname=="San Mateo" & state=="California"
replace tax_city = "Carrollton" if tax_city=="Plano" & countyname=="Denton" & state=="Texas" & city=="Carrollton"
replace countyfips = 491 if tax_city=="Round Rock" & countyname=="Travis" & state=="Texas" & city=="Austin"
replace fips = 48491 if tax_city=="Round Rock" & countyname=="Travis" & state=="Texas" & city=="Austin"
replace countyname = "Williamson" if tax_city=="Round Rock" & countyname=="Travis" & state=="Texas" & city=="Austin"
replace tax_city = "Austin" if tax_city=="Round Rock" & countyname=="Williamson" & state=="Texas" & city=="Austin" & gis_city=="Round Rock"
replace tax_city = "Park City" if tax_city=="Salt Lake City" & countyname=="Summit" & state=="Utah"
replace countyfips = 59 if tax_city=="San Clemente" & countyname=="San Diego" & state=="California" & g_county=="Orange County"
replace fips = 6059 if tax_city=="San Clemente" & countyname=="San Diego" & state=="California" & g_county=="Orange County"
replace countyname = "Orange" if tax_city=="San Clemente" & countyname=="San Diego" & state=="California" & g_county=="Orange County"
replace countyfips = 121 if tax_city=="Sandy Springs" & countyname=="DeKalb" & state=="Georgia"
replace fips = 13121 if tax_city=="Sandy Springs" & countyname=="DeKalb" & state=="Georgia"
replace countyname = "Fulton" if tax_city=="Sandy Springs" & countyname=="DeKalb" & state=="Georgia"

save combined_property_file_cleaned.dta, replace

********************************************************************************
*convert property INFO files into stata files, then append them into one combined file

set more off

foreach x in Anchorage Atlanta Austin Boston Charlotte Chicago Cleveland Denver Dallas Houston Indianapolis LosAngeles Louisville ///
Miami Minneapolis Nashville NewOrleans NewYork Oakland Orlando Philadelphia Phoenix SaltLakeCity SanDiego SanJose Seattle WashingtonDC {

*capture noisily cd "$propinfo"

capture noisily import delimited `x'PropertyFile.csv, bindquotes(strict) clear

capture noisily tostring neighborhood, replace

capture noisily save `x'_property_file.dta, replace

}

*****
*now append

clear

use Anchorage_property_file.dta, clear

foreach x in Atlanta Austin Boston Charlotte Chicago Cleveland Denver Dallas Houston Indianapolis LosAngeles Louisville ///
Miami Minneapolis Nashville NewOrleans NewYork Oakland Orlando Philadelphia Phoenix SaltLakeCity SanDiego SanJose Seattle WashingtonDC {

capture noisily append using `x'_property_file.dta

}

drop if missing(propertyid)

bysort propertyid: gen dupindex = _n
drop if dupindex>1
drop dupindex

save combined_property_info_file.dta, replace

**********************
**importing daily metro files to convert to property-month by metro files

clear
set more off

foreach x in Anchorage Atlanta Austin Boston Charlotte Chicago Cleveland Denver Dallas Houston Indianapolis LosAngeles Louisville ///
Miami Minneapolis Nashville NewOrleans NewYork Oakland Orlando Philadelphia Phoenix SaltLakeCity SanDiego SanJose Seattle WashingtonDC {

import delimited `x'_MSA_Daily.csv

drop if missing(propertyid)

gen obsdate = date(date, "YMD")
format obsdate %td
gen obsyear = year(obsdate)
gen obsmonth = month(obsdate)
gen obsweek = week(obsdate)
gen obsyw = yw(obsyear,obsweek)
format obsyw %tw
gen obsym = ym(obsyear,obsmonth)
format obsym %tm

sort propertyid obsdate

gen bookdate = date(bookeddate, "YMD")
format bookdate %td

*if status is reserved and it is missing a bookeddate, indicated by the following variable:
gen missingbookdate = 0
replace missingbookdate = 1 if status=="R" & missing(bookeddate)

sort propertyid obsdate

*if bookeddate is missing but there's a reservation id, then we assign the bookdate as one day earlier
bysort propertyid: replace bookdate = obsdate - 1 if missingbookdate==1 & !missing(reservationid)

sort propertyid obsdate

/*if bookeddate is missing for a given reservation id, minbookdate is assigned based on the earliest 
bookdate assigned to that reservationid in the previous step.  If the reservationid is missing but
is assigned a bookdate, that bookdate will be replaced by the obs date after the replace commands 
roughly 20 lines below.
*/
bysort propertyid status reservationid: egen minbookdate = min(bookdate)
format minbookdate %td
replace bookdate = minbookdate if missingbookdate==1 & !missing(minbookdate)

sort propertyid obsdate

gen bookyear = year(bookdate)
gen bookmonth = month(bookdate)
gen bookweek = week(bookdate)
gen bookyw = yw(bookyear,bookweek)
format bookyw %tw
gen bookym = ym(bookyear,bookmonth)
format bookym %tm

*a listing is considered booked if its status is reserved and it is not missing a bookdate nor a reservationid
gen booked = 0
replace booked = 1 if status=="R" & !missing(bookdate) & !missing(reservationid)

*reflects the price of the listing on days that it is considered booked
gen bookprice = price if status=="R" & !missing(bookdate) & !missing(reservationid)

/*replaces bookym and bookyw with the observation month or week for observations without bookings/bookdate 
info (for collapse purposes)*/
replace bookym = obsym if missing(bookym) | booked==0
replace bookyw = obsyw if missing(bookyw) | booked==0

** also generate a count of the number of unique reservations
bysort propertyid reservationid: gen numbookings=_n==1 if reservationid!=.

collapse (sum) booked numbookings (mean) bookprice, by(propertyid bookym)

fillin propertyid bookym

drop if missing(bookym)

*this process removes the filled in months for a property that fall outside of the observed first and last month
gen intra_fillin = _fillin
bysort propertyid: replace intra_fillin = 0 if intra_fillin==1 & intra_fillin[_n-1]==0

gen negbookym = -bookym
sort propertyid negbookym
gen intra_fillin2 = _fillin
bysort propertyid: replace intra_fillin2 = 0 if intra_fillin2==1 & intra_fillin2[_n-1]==0

sort propertyid bookym

gen intra_fillin_sum = intra_fillin + intra_fillin2
drop if intra_fillin_sum==1

*replacing booked = 0 for filled in observations so that they're included in analysis of bookings
replace booked=0 if _fillin==1

save `x'_monthly.dta, replace
clear

}

** now append each metro- book month file to get a combined property- book month panel

use Anchorage_monthly, clear
gen metro = "Anchorage"

foreach x in Atlanta Austin Boston Charlotte Chicago Cleveland Denver Dallas Houston Indianapolis LosAngeles Louisville ///
Miami Minneapolis Nashville NewOrleans NewYork Oakland Orlando Philadelphia Phoenix SaltLakeCity SanDiego SanJose Seattle WashingtonDC {

append using `x'_monthly
replace metro = "`x'" if missing(metro)

}

merge m:1 propertyid using "$propgis/combined_property_file_cleaned.dta", ///
keepusing(state city tax_city zipcode neighborhood latitude longitude statefips countyfips fips countyname waloccode nonuscoords)

drop if _merge!=3

save combined_monthly.dta, replace


********************************************************************************
*redo the above, collapsing instead by observation date to generate measures of supply and asking price

clear
set more off

foreach x in Anchorage Atlanta Austin Boston Charlotte Chicago Cleveland Denver Dallas Houston Indianapolis LosAngeles Louisville ///
Miami Minneapolis Nashville NewOrleans NewYork Oakland Orlando Philadelphia Phoenix SaltLakeCity SanDiego SanJose Seattle WashingtonDC {

import delimited `x'_MSA_Daily.csv

drop if missing(propertyid)

gen obsdate = date(date, "YMD")
format obsdate %td
gen obsyear = year(obsdate)
gen obsmonth = month(obsdate)
gen obsweek = week(obsdate)
gen obsyw = yw(obsyear,obsweek)
format obsyw %tw
gen obsym = ym(obsyear,obsmonth)
format obsym %tm

sort propertyid obsdate

gen bookdate = date(bookeddate, "YMD")
format bookdate %td

gen missingbookdate = 0
replace missingbookdate = 1 if status=="R" & missing(bookeddate)

sort propertyid obsdate

bysort propertyid: replace bookdate = obsdate - 1 if missingbookdate==1 & !missing(reservationid)

sort propertyid obsdate

bysort propertyid status reservationid: egen minbookdate = min(bookdate)
format minbookdate %td
replace bookdate = minbookdate if missingbookdate==1 & !missing(minbookdate)

sort propertyid obsdate

gen booked = 0
replace booked = 1 if status=="R" & !missing(bookdate) & !missing(reservationid)

gen bookprice = price if status=="R" & !missing(bookdate) & !missing(reservationid)

gen supply = 1
replace supply = 0 if status=="B"

gen askingprice = price if supply==1

collapse (sum) supply (mean) askingprice, by(propertyid obsym)

fillin propertyid obsym

drop if missing(obsym)

*this process removes the filled in months for a property that fall outside of the observed first and last month
gen intra_fillin = _fillin
bysort propertyid: replace intra_fillin = 0 if intra_fillin==1 & intra_fillin[_n-1]==0

gen negobsym = -obsym
sort propertyid negobsym
gen intra_fillin2 = _fillin
bysort propertyid: replace intra_fillin2 = 0 if intra_fillin2==1 & intra_fillin2[_n-1]==0

sort propertyid obsym

gen intra_fillin_sum = intra_fillin + intra_fillin2
drop if intra_fillin_sum==1

*replacing supply = 0 for filled in observations so that they're included in analysis of supply
replace supply=0 if _fillin==1

save `x'_monthly_obsym.dta, replace
clear

}

**now append all these files for later merge that adds in asking price and supply variables

use Anchorage_monthly_obsym, clear
gen metro = "Anchorage"

foreach x in Atlanta Austin Boston Charlotte Chicago Cleveland Denver Dallas Houston Indianapolis LosAngeles Louisville ///
Miami Minneapolis Nashville NewOrleans NewYork Oakland Orlando Philadelphia Phoenix SaltLakeCity SanDiego ///
SanJose Seattle WashingtonDC {

append using `x'_monthly_obsym
replace metro = "`x'" if missing(metro)

}

save combined_monthly_obsym.dta, replace


***********************************************
***converting tax spreadsheets to stata

import excel taxdata_1_4_18.xlsx, sheet("state only") firstrow case(lower) clear

drop if missing(stateonlytax)

expand 39
sort state
bysort state stateonlytaxdate: gen index = _n
gen obsym = 653 + index
format obsym %tm
drop index
gen obsdate = dofm(obsym)
gen obsmonth = month(obsdate)

gen stateonlytaxyear = year(stateonlytaxdate)
gen stateonlytaxmonth = month(stateonlytaxdate)
gen stateonlytaxday = day(stateonlytaxdate)
gen stateonlytaxym = ym(stateonlytaxyear,stateonlytaxmonth)
format stateonlytaxym %tm

gen stateonlytaxdummy = 0 if !missing(stateonlytaxdate)
replace stateonlytaxdummy = 1 if stateonlytaxym<obsym
replace stateonlytaxdummy = (32-stateonlytaxday)/31 if stateonlytaxym==obsym
replace stateonlytaxdummy = (31-stateonlytaxday)/30 if stateonlytaxym==obsym & inlist(obsmonth,9,4,6,11)
replace stateonlytaxdummy = (29-stateonlytaxday)/28 if stateonlytaxym==obsym & obsmonth==2

gen stateonlytaxvar = stateonlytax*stateonlytaxdummy
collapse (sum) stateonlytaxvar, by(state obsym)

bysort state: egen firsttax = min(obsym) if stateonlytaxvar>0
bysort state: egen statetaxintro = min(firsttax)
drop firsttax

save stateonlytaxes.dta, replace

**

import excel taxdata_1_4_18.xlsx, sheet("state-county only") firstrow case(lower) clear

rename county countyname
keep state countyname statecountytax statecountytaxdate
drop if missing(statecountytax)

expand 39
sort state countyname statecountytaxdate
bysort state countyname statecountytaxdate: gen index = _n
gen obsym = 653 + index
format obsym %tm
drop index
gen obsdate = dofm(obsym)
gen obsmonth = month(obsdate)

gen statecountytaxyear = year(statecountytaxdate)
gen statecountytaxmonth = month(statecountytaxdate)
gen statecountytaxday = day(statecountytaxdate)
gen statecountytaxym = ym(statecountytaxyear,statecountytaxmonth)
format statecountytaxym %tm

gen statecountytaxdummy = 0 if !missing(statecountytaxdate)
replace statecountytaxdummy = 1 if statecountytaxym<obsym
replace statecountytaxdummy = (32-statecountytaxday)/31 if statecountytaxym==obsym
replace statecountytaxdummy = (31-statecountytaxday)/30 if statecountytaxym==obsym & inlist(obsmonth,9,4,6,11)
replace statecountytaxdummy = (29-statecountytaxday)/28 if statecountytaxym==obsym & obsmonth==2

gen statecountytaxvar = statecountytax*statecountytaxdummy
collapse (sum) statecountytaxvar, by(state countyname obsym)

bysort state countyname: egen firsttax = min(obsym) if statecountytaxvar>0
bysort state countyname: egen statecountytaxintro = min(firsttax)
drop firsttax

save statecountyonlytaxes.dta, replace

**

import excel taxdata_1_4_18.xlsx, sheet("state-city") firstrow case(lower) clear

keep city state statecitytax statecitytaxdate
drop if missing(statecitytax)

expand 39
sort state city statecitytaxdate
bysort state city statecitytaxdate: gen index = _n
gen obsym = 653 + index
format obsym %tm
drop index
gen obsdate = dofm(obsym)
gen obsmonth = month(obsdate)

gen statecitytaxyear = year(statecitytaxdate)
gen statecitytaxmonth = month(statecitytaxdate)
gen statecitytaxday = day(statecitytaxdate)
gen statecitytaxym = ym(statecitytaxyear,statecitytaxmonth)
format statecitytaxym %tm

gen statecitytaxdummy = 0 if !missing(statecitytaxdate)
replace statecitytaxdummy = 1 if statecitytaxym<obsym
replace statecitytaxdummy = (32-statecitytaxday)/31 if statecitytaxym==obsym
replace statecitytaxdummy = (31-statecitytaxday)/30 if statecitytaxym==obsym & inlist(obsmonth,9,4,6,11)
replace statecitytaxdummy = (29-statecitytaxday)/28 if statecitytaxym==obsym & obsmonth==2

gen statecitytaxvar = statecitytax*statecitytaxdummy
collapse (sum) statecitytaxvar, by(state city obsym)

gen tax_city = city

bysort state tax_city: egen firsttax = min(obsym) if statecitytaxvar!=0
bysort state tax_city: egen statecitytaxintro = min(firsttax)
drop firsttax

save statecitytaxes.dta, replace

**

import excel taxdata_1_4_18.xlsx, sheet("state-county-city") firstrow case(lower) clear

rename county countyname
keep city state county statecountycitytax statecountycitytaxdate
drop if missing(statecountycitytax)

expand 39
sort state countyname city statecountycitytaxdate
bysort state countyname city statecountycitytaxdate: gen index = _n
gen obsym = 653 + index
format obsym %tm
drop index
gen obsdate = dofm(obsym)
gen obsmonth = month(obsdate)

gen statecountycitytaxyear = year(statecountycitytaxdate)
gen statecountycitytaxmonth = month(statecountycitytaxdate)
gen statecountycitytaxday = day(statecountycitytaxdate)
gen statecountycitytaxym = ym(statecountycitytaxyear,statecountycitytaxmonth)
format statecountycitytaxym %tm

gen statecountycitytaxdummy = 0 if !missing(statecountycitytaxdate)
replace statecountycitytaxdummy = 1 if statecountycitytaxym<obsym
replace statecountycitytaxdummy = (32-statecountycitytaxday)/31 if statecountycitytaxym==obsym
replace statecountycitytaxdummy = (31-statecountycitytaxday)/30 if statecountycitytaxym==obsym & inlist(obsmonth,9,4,6,11)
replace statecountycitytaxdummy = (29-statecountycitytaxday)/28 if statecountycitytaxym==obsym & obsmonth==2

gen statecountycitytaxvar = statecountycitytax*statecountycitytaxdummy
collapse (sum) statecountycitytaxvar, by(state countyname city obsym)

gen tax_city = city

bysort state countyname tax_city: egen firsttax = min(obsym) if statecountycitytaxvar!=0
bysort state countyname tax_city: egen statecountycitytaxintro = min(firsttax)
drop firsttax

save statecountycitytaxes.dta, replace

***
*Seattle metro tax file

import excel seattle_tax_by_loccode_1_4_18.xlsx, sheet("Table 1") firstrow case(lower) clear

rename totaltax watax

expand 39
sort waloccode
bysort waloccode wataxdate: gen index = _n
gen obsym = 653 + index
format obsym %tm
drop index
gen obsdate = dofm(obsym)
gen obsmonth = month(obsdate)

gen wataxyear = year(wataxdate)
gen wataxmonth = month(wataxdate)
gen wataxday = day(wataxdate)
gen wataxym = ym(wataxyear,wataxmonth)
format wataxym %tm

gen wataxdummy = 0 if !missing(wataxdate)
replace wataxdummy = 1 if wataxym<obsym
replace wataxdummy = (32-wataxday)/31 if wataxym==obsym
replace wataxdummy = (31-wataxday)/30 if wataxym==obsym & inlist(obsmonth,9,4,6,11)
replace wataxdummy = (29-wataxday)/28 if wataxym==obsym & obsmonth==2

gen wataxvar = watax*wataxdummy
collapse (max) wataxvar, by(waloccode obsym)

bysort waloccode: egen firsttax = min(obsym) if wataxvar!=0
bysort waloccode: egen wataxintro = min(firsttax)
drop firsttax

save seattle_tax_by_loccode.dta, replace


*******************************
*merging on tax and property info, and creating final property-book month panel

clear
set more off

use combined_monthly.dta, clear

drop if missing(tax_city)
drop if missing(countyname)
drop if missing(state)

gen obsym = bookym

merge m:1 state tax_city obsym using statecitytaxes.dta, gen(_mergecitytax)
drop if _mergecitytax==2

merge m:1 state countyname tax_city obsym using statecountycitytaxes.dta, gen(_mergecountycitytax)
drop if _mergecountycitytax==2

merge m:1 state countyname obsym using statecountyonlytaxes.dta, gen(_mergecountytax)
drop if _mergecountytax==2

merge m:1 state obsym using stateonlytaxes.dta, gen(_mergestatetax)
drop if _mergestatetax==2

destring waloccode, replace
replace waloccode = 5094 if waloccode==1704
replace waloccode = 5106 if waloccode==2706
replace waloccode = 5111 if waloccode==2711
replace waloccode = 5121 if waloccode==2721
merge m:1 waloccode obsym using seattle_tax_by_loccode.dta, gen(_mergewatax)
drop if _mergewatax==2

*drops july 2014 and October 2017 (sparse data, tail ends of sample)
drop if inlist(obsym,654,693)

*dropping b/c not useful or don't have good tax info on it
drop if state=="Puerto Rico"
drop if tax_city=="Palo Alto"
drop if tax_city=="Santa Monica"
drop if nonuscoords==1

merge m:1 propertyid using "combined_property_info_file.dta", gen(_mergeinfo) ///
keepusing(hostid propertytype listingtype createddate lastscrapeddate numberofreviews overallrating bedrooms ///
bathrooms maxguests responserate responsetimemin superhost cancellationpolicy securitydeposit cleaningfee ///
extrapeoplefee averagedailyrate occupancyrateltm publishednightlyrate publishedmonthlyrate publishedweeklyrate ///
checkintime checkouttime minimumstay numberofphotos countreservationdaysltm countavailabledaysltm countblockeddaysltm ///
businessready instantbookenabled)
drop if _mergeinfo==2

merge 1:1 propertyid obsym using "combined_monthly_obsym.dta", keepusing(askingprice supply) gen(_mergeobsym)
drop if _mergeobsym==2

replace supply=0 if _mergeobsym==1

bysort propertyid obsym: gen dupindex = _n
drop if dupindex>1
drop dupindex

replace statecountycitytaxvar = 0 if missing(statecountycitytaxvar)
replace statecitytaxvar = 0 if missing(statecitytaxvar)
replace statecountytaxvar = 0 if missing(statecountytaxvar)
replace stateonlytaxvar = 0 if missing(stateonlytaxvar)

gen taxvar = statecountycitytaxvar + statecitytaxvar + statecountytaxvar + stateonlytaxvar
replace taxvar = wataxvar if taxvar==0 & metro=="Seattle"

egen firsttax = rowmin(statecountycitytaxintro statecitytaxintro statecountytaxintro statetaxintro wataxintro)
bysort propertyid: egen taxintro = min(firsttax)
drop firsttax

sort propertyid obsym

gen taxrate = taxvar/100
gen lntaxrate = ln(1+taxrate)

encode countyname, gen(countyname1)
encode state, gen(state1)
encode tax_city, gen(tax_city1)

gen time = obsym-654

gen obsdate = dofm(obsym)
gen obsmonth = month(obsdate)

gen obsquarter = 1 if inlist(obsmonth,1,2,3)
replace obsquarter = 2 if inlist(obsmonth,4,5,6)
replace obsquarter = 3 if inlist(obsmonth,7,8,9)
replace obsquarter = 4 if inlist(obsmonth,10,11,12)

gen bedgrp = 0 if missing(bedrooms)
replace bedgrp = 1 if bedrooms<2
replace bedgrp = 2 if bedrooms>=2 & !missing(bedrooms)

gen startdate = date(createddate, "YMD")
replace startdate = date(createddate, "MDY") if missing(startdate)
format startdate %td
gen startyear = year(startdate)
gen startmonth = month(startdate)
gen startym = ym(startyear,startmonth)
format startym %tm

replace cleaningfee = 0 if missing(cleaningfee)
replace extrapeoplefee = 0 if missing(extrapeoplefee)
replace securitydeposit = 0 if missing(securitydeposit)

sort propertyid bookym

bysort propertyid: gen index = _n
bysort propertyid: egen numofmonths = max(index)
bysort propertyid: egen maxtaxrate = max(taxvar)

gen superhost1 = .
replace superhost1 = 0 if superhost=="False"
replace superhost1 = 1 if superhost=="True"

gen businessready1 = .
replace businessready1 = 0 if businessready=="False"
replace businessready1 = 1 if businessready=="True"

sort propertyid bookym

compress

save combined_monthly_analysis.dta, replace

*********
***End***
*********



********************************************************************************
** CREATE ANALYSIS DATA SETS FROM THE INTERMEDIATE DATA FILES CREATED ABOVE
** ONE FOR THE MAIN ANALYSIS AND TWO FOR ROBUSTNESS CHECKS

**** UNRESTRICTED "FULL" DATA SET

use "combined_monthly_analysis.dta", clear

*Keeping jurisdictions for analysis
keep if ///
(tax_city=="New York" & countyname=="New York" & state=="New York") | ///
(tax_city=="Los Angeles" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="New York" & countyname=="Kings" & state=="New York") | ///
(tax_city=="San Francisco" & countyname=="San Francisco" & state=="California") | ///
(tax_city=="Austin" & countyname=="Travis" & state=="Texas") | ///
(tax_city=="Chicago" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Philadelphia" & countyname=="Philadelphia" & state=="Pennsylvania") | ///
(tax_city=="San Diego" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Miami Beach" & countyname=="Miami-Dade" & state=="Florida") | ///
(tax_city=="Houston" & countyname=="Harris" & state=="Texas") | ///
(tax_city=="Washington" & countyname=="District of Columbia" & state=="District of Columbia") | ///
(tax_city=="Seattle" & countyname=="King" & state=="Washington") | ///
(tax_city=="Boston" & countyname=="Suffolk" & state=="Massachusetts") | ///
(tax_city=="New Orleans" & countyname=="Orleans" & state=="Louisiana") | ///
(tax_city=="New York" & countyname=="Queens" & state=="New York") | ///
(tax_city=="Nashville" & countyname=="Davidson" & state=="Tennessee") | ///
(tax_city=="Miami" & countyname=="Miami-Dade" & state=="Florida") | ///
(tax_city=="Denver" & countyname=="Denver" & state=="Colorado") | ///
(tax_city=="Four Corners" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Atlanta" & countyname=="Fulton" & state=="Georgia") | ///
(tax_city=="San Jose" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Kissimmee" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Oakland" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Cambridge" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Phoenix" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Orlando" & countyname=="Orange" & state=="Florida") | ///
(tax_city=="Dallas" & countyname=="Dallas" & state=="Texas") | ///
(tax_city=="Jersey City" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Berkeley" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Scottsdale" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Indianapolis" & countyname=="Marion" & state=="Indiana") | ///
(tax_city=="Louisville" & countyname=="Jefferson" & state=="Kentucky") | ///
(tax_city=="Fort Lauderdale" & countyname=="Broward" & state=="Florida") | ///
(tax_city=="West Hollywood" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Hollywood" & countyname=="Broward" & state=="Florida") | ///
(tax_city=="Salt Lake City" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Minneapolis" & countyname=="Hennepin" & state=="Minnesota") | ///
(tax_city=="Sunny Isles Beach" & countyname=="Miami-Dade" & state=="Florida") | ///
(tax_city=="Santa Clara" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Arlington" & countyname=="Arlington" & state=="Virginia") | ///
(tax_city=="Cleveland" & countyname=="Cuyahoga" & state=="Ohio") | ///
(tax_city=="Long Beach" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Irvine" & countyname=="Orange" & state=="California") | ///
(tax_city=="Mountain View" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Somerville" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Sunnyvale" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Charlotte" & countyname=="Mecklenburg" & state=="North Carolina") | ///
(tax_city=="Newport Beach" & countyname=="Orange" & state=="California") | ///
(tax_city=="New York" & countyname=="Bronx" & state=="New York") | ///
(tax_city=="Galveston" & countyname=="Galveston" & state=="Texas") | ///
(tax_city=="Anchorage" & countyname=="Anchorage" & state=="Alaska") | ///
(tax_city=="Anaheim" & countyname=="Orange" & state=="California") | ///
(tax_city=="Four Corners" & countyname=="Lake" & state=="Florida") | ///
(tax_city=="Pasadena" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Encinitas" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Beverly Hills" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Oceanside" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Malibu" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Tacoma" & countyname=="Pierce" & state=="Washington") | ///
(tax_city=="Brookline" & countyname=="Norfolk" & state=="Massachusetts") | ///
(tax_city=="Huntington Beach" & countyname=="Orange" & state=="California") | ///
(tax_city=="Carlsbad" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Tempe" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Hoboken" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="San Mateo" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="West Palm Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Pompano Beach" & countyname=="Broward" & state=="Florida") | ///
(tax_city=="Hallandale Beach" & countyname=="Broward" & state=="Florida") | ///
(tax_city=="Alexandria" & countyname=="Alexandria" & state=="Virginia") | ///
(tax_city=="Atlanta" & countyname=="DeKalb" & state=="Georgia") | ///
(tax_city=="Fort Worth" & countyname=="Tarrant" & state=="Texas") | ///
(tax_city=="Bellevue" & countyname=="King" & state=="Washington") | ///
(tax_city=="Union City" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Redwood City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Fremont" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Mesa" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Redondo Beach" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Delray Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Hermosa Beach" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="San Clemente" & countyname=="Orange" & state=="California") | ///
(tax_city=="Glendale" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Menlo Park" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Rowland Heights" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="St. Paul" & countyname=="Ramsey" & state=="Minnesota") | ///
(tax_city=="Culver City" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Aurora" & countyname=="Arapahoe" & state=="Colorado") | ///
(tax_city=="Boca Raton" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Aventura" & countyname=="Miami-Dade" & state=="Florida") | ///
(tax_city=="Laguna Beach" & countyname=="Orange" & state=="California") | ///
(tax_city=="Doral" & countyname=="Miami-Dade" & state=="Florida") | ///
(tax_city=="New York" & countyname=="Richmond" & state=="New York") | ///
(tax_city=="Burbank" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Manhattan Beach" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Weehawken" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Evanston" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Milpitas" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Daly City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Lakewood" & countyname=="Jefferson" & state=="Colorado") | ///
(tax_city=="Costa Mesa" & countyname=="Orange" & state=="California") | ///
(tax_city=="Sandy" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Newton" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Franklin" & countyname=="Williamson" & state=="Tennessee") | ///
(tax_city=="Kirkland" & countyname=="King" & state=="Washington") | ///
(tax_city=="Alameda" & countyname=="Alameda" & state=="California") | ///
(tax_city=="West New York" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Newark" & countyname=="Essex" & state=="New Jersey") | ///
(tax_city=="Silver Spring" & countyname=="Montgomery" & state=="Maryland") | ///
(tax_city=="Cleveland Heights" & countyname=="Cuyahoga" & state=="Ohio") | ///
(tax_city=="Bethesda" & countyname=="Montgomery" & state=="Maryland") | ///
(tax_city=="University Place" & countyname=="Pierce" & state=="Washington") | ///
(tax_city=="Richmond" & countyname=="Contra Costa" & state=="California") | ///
(tax_city=="Oak Park" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Lakewood" & countyname=="Cuyahoga" & state=="Ohio") | ///
(tax_city=="Millcreek" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Redmond" & countyname=="King" & state=="Washington") | ///
(tax_city=="Metairie" & countyname=="Jefferson" & state=="Louisiana") | ///
(tax_city=="Golden" & countyname=="Jefferson" & state=="Colorado") | ///
(tax_city=="Vashon" & countyname=="King" & state=="Washington") 

*this reassigns 23 properties (updated 1-19-18)
replace metro="Oakland" if countyname=="San Mateo" & metro=="SanJose"

*this reassigns 16 properties (updated 2-26-18)
replace metro="LosAngeles" if countyname=="Orange" & metro=="SanDiego"

egen taxjuris = group(tax_city countyname state)
egen countyid = group(countyname state)

sort propertyid obsym

keep if bedrooms<=4
keep if maxguests<=12
drop if listingtype=="Shared room"

*generating mean ask price by property id for dropping of outliers
bysort propertyid: egen meanaskprice = mean(askingprice)
gen meanaskprice1 = meanaskprice
replace meanaskprice1 = . if index!=1

*generating percentiles to next drop properties with outlying prices
bysort taxjuris: egen ask_pctile10 = pctile(meanaskprice1), p(10)
bysort taxjuris: egen ask_pctile90 = pctile(meanaskprice1), p(90)

*only keep those with prices between 10 and 90 percentile of their tax jurisdiction
keep if meanaskprice<=ask_pctile90 & meanaskprice>=ask_pctile10

drop meanaskprice

sort propertyid obsym

gen askpricewithtax = askingprice*(1+(taxvar/100))
gen bookpricewithtax = bookprice*(1+(taxvar/100))

*logging variables of interest
gen lntaxvar = ln(1 + (taxvar/100))
gen lnbookprice = ln(bookprice)
gen lnaskingprice = ln(askingprice)
gen lnbookpricewithtax = ln(bookpricewithtax)
gen lnaskpricewithtax = ln(askpricewithtax)

*do 1 + supply and 1 + booked to deal with zeros.  could also use poisson.
gen lnsupply = ln(1 + supply)
gen lnbooked = ln(1 + booked)

*entry/exit
bysort propertyid: egen lastmonth = max(obsym)
bysort propertyid: egen firstmonth = min(obsym)
*655 is first month of data
gen entry = 0 if obsym!=655
replace entry = 1 if entry==0 & obsym==firstmonth
*692 is last month of data
gen exit = 0 if obsym!=692
replace exit = 1 if exit==0 & obsym==lastmonth

** include a second definition of exit, last month with positive supply
bysort propertyid: egen lastsupply = max(obsym) if supply>0
bysort propertyid: egen lastsupply2 = min(lastsupply)
gen exit2 = 0
replace exit2 = 1 if obsym==lastsupply2 & lastsupply2!=692
replace exit2=. if obsym>lastsupply2 

gen date = dofm(obsym)
format date %td
gen obsyear = year(date)
drop date

encode metro, gen(metro1)

encode cancellationpolicy, gen(cancel)
replace cancel = 99 if missing(cancellationpolicy)

gen rated = 0
replace rated = 1 if !missing(overallrating)
gen rating1 = overallrating*10
replace rating1 = 99 if missing(overallrating)
gen rating1sq = rating1^2

gen bathrooms1 = bathrooms*2
replace bathrooms1 = 8 if bathrooms1>8 & !missing(bathrooms1)
replace bathrooms1 = 99 if missing(bathrooms)

gen maxguests1 = maxguests
replace maxguests1 = 8 if maxguests1>8 & !missing(maxguests1)

gen minimumstay1 = minimumstay
replace minimumstay1 = 8 if minimumstay>=8 & !missing(minimumstay1)
replace minimumstay1 = 99 if missing(minimumstay)

gen hasphotos = 0
replace hasphotos = 1 if !missing(numberofphotos)
gen photos1 = numberofphotos
replace photos1 = 999 if missing(numberofphotos)
gen photos1sq = photos1^2

drop superhost1
gen superhost1 = 0
replace superhost1 = 1 if inlist(superhost,"True","t")
replace superhost1 = 99 if missing(superhost)

gen superhost_dum = superhost1
replace superhost_dum = . if missing(superhost)

gen securitydepositsq = securitydeposit^2
gen cleaningfeesq = cleaningfee^2
gen extrapeoplefeesq = extrapeoplefee^2

gen businessready_dum = inlist(businessready , "True" "t" )

** get total number if properties listed by host that month 
gen obs=1
bysort hostid bookym: egen num_host_props = sum(obs)
tab num_host_props

gen singlelisted = (num_host_props==1)
gen multilisted = (num_host_props>1)
gen fivepluslisted = (num_host_props>=5)

xtset propertyid obsym

** Save a property-month panel 

save "propmonth_analysis_full.dta" , replace


**************************************
**Restricted sample*******************
**************************************

use "combined_monthly_analysis.dta", clear

*Keeping jurisdictions for analysis
keep if ///
(tax_city=="Los Angeles" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Austin" & countyname=="Travis" & state=="Texas") | ///
(tax_city=="San Diego" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Houston" & countyname=="Harris" & state=="Texas") | ///
(tax_city=="Washington" & countyname=="District of Columbia" & state=="District of Columbia") | ///
(tax_city=="Seattle" & countyname=="King" & state=="Washington") | ///
(tax_city=="Boston" & countyname=="Suffolk" & state=="Massachusetts") | ///
(tax_city=="New Orleans" & countyname=="Orleans" & state=="Louisiana") | ///
(tax_city=="Four Corners" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Atlanta" & countyname=="Fulton" & state=="Georgia") | ///
(tax_city=="Kissimmee" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Oakland" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Cambridge" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Phoenix" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Orlando" & countyname=="Orange" & state=="Florida") | ///
(tax_city=="Dallas" & countyname=="Dallas" & state=="Texas") | ///
(tax_city=="Jersey City" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Scottsdale" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Indianapolis" & countyname=="Marion" & state=="Indiana") | ///
(tax_city=="Salt Lake City" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Minneapolis" & countyname=="Hennepin" & state=="Minnesota") | ///
(tax_city=="Sunny Isles Beach" & countyname=="Miami-Dade" & state=="Florida") | ///
(tax_city=="Santa Clara" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Arlington" & countyname=="Arlington" & state=="Virginia") | ///
(tax_city=="Long Beach" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Mountain View" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Somerville" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Charlotte" & countyname=="Mecklenburg" & state=="North Carolina") | ///
(tax_city=="Galveston" & countyname=="Galveston" & state=="Texas") | ///
(tax_city=="Anchorage" & countyname=="Anchorage" & state=="Alaska") | ///
(tax_city=="Four Corners" & countyname=="Lake" & state=="Florida") | ///
(tax_city=="Pasadena" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Beverly Hills" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Oceanside" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Malibu" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Brookline" & countyname=="Norfolk" & state=="Massachusetts") | ///
(tax_city=="Huntington Beach" & countyname=="Orange" & state=="California") | ///
(tax_city=="Tempe" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Hoboken" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="San Mateo" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="West Palm Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Alexandria" & countyname=="Alexandria" & state=="Virginia") | ///
(tax_city=="Atlanta" & countyname=="DeKalb" & state=="Georgia") | ///
(tax_city=="Fort Worth" & countyname=="Tarrant" & state=="Texas") | ///
(tax_city=="Bellevue" & countyname=="King" & state=="Washington") | ///
(tax_city=="Redwood City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Fremont" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Mesa" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Delray Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Glendale" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Menlo Park" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Rowland Heights" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="St. Paul" & countyname=="Ramsey" & state=="Minnesota") | ///
(tax_city=="Culver City" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Weehawken" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Evanston" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Milpitas" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Daly City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Lakewood" & countyname=="Jefferson" & state=="Colorado") | ///
(tax_city=="Costa Mesa" & countyname=="Orange" & state=="California") | ///
(tax_city=="Sandy" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Newton" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Kirkland" & countyname=="King" & state=="Washington") | ///
(tax_city=="Alameda" & countyname=="Alameda" & state=="California") | ///
(tax_city=="West New York" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Newark" & countyname=="Essex" & state=="New Jersey") | ///
(tax_city=="Silver Spring" & countyname=="Montgomery" & state=="Maryland") | /// 
(tax_city=="Cleveland Heights" & countyname=="Cuyahoga" & state=="Ohio") | ///
(tax_city=="Bethesda" & countyname=="Montgomery" & state=="Maryland") | ///
(tax_city=="University Place" & countyname=="Pierce" & state=="Washington") | ///
(tax_city=="Richmond" & countyname=="Contra Costa" & state=="California") | ///
(tax_city=="Oak Park" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Lakewood" & countyname=="Cuyahoga" & state=="Ohio") | ///
(tax_city=="Millcreek" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Redmond" & countyname=="King" & state=="Washington") | ///
(tax_city=="Metairie" & countyname=="Jefferson" & state=="Louisiana") | ///
(tax_city=="Golden" & countyname=="Jefferson" & state=="Colorado") | ///
(tax_city=="Vashon" & countyname=="King" & state=="Washington") ///

*this reassigns 23 properties (updated 1-19-18)
replace metro="Oakland" if countyname=="San Mateo" & metro=="SanJose"

*this reassigns 16 properties (updated 2-26-18)
replace metro="LosAngeles" if countyname=="Orange" & metro=="SanDiego"

egen taxjuris = group(tax_city countyname state)
egen countyid = group(countyname state)

sort propertyid obsym

keep if bedrooms<=4
keep if maxguests<=12
drop if listingtype=="Shared room"

*generating mean ask price by property id for dropping of outliers
bysort propertyid: egen meanaskprice = mean(askingprice)
gen meanaskprice1 = meanaskprice
replace meanaskprice1 = . if index!=1

*generating percentiles to next drop properties with outlying prices
bysort taxjuris: egen ask_pctile10 = pctile(meanaskprice1), p(10)
bysort taxjuris: egen ask_pctile90 = pctile(meanaskprice1), p(90)

*only keep those with prices between 10 and 90 percentile of their tax jurisdiction
keep if meanaskprice<=ask_pctile90 & meanaskprice>=ask_pctile10

drop meanaskprice

sort propertyid obsym

gen askpricewithtax = askingprice*(1+(taxvar/100))
gen bookpricewithtax = bookprice*(1+(taxvar/100))

*logging variables of interest
gen lntaxvar = ln(1 + (taxvar/100))
gen lnbookprice = ln(bookprice)
gen lnaskingprice = ln(askingprice)
gen lnbookpricewithtax = ln(bookpricewithtax)
gen lnaskpricewithtax = ln(askpricewithtax)

*do 1 + supply and 1 + booked to deal with zeros.  could also use poisson.
gen lnsupply = ln(1 + supply)
gen lnbooked = ln(1 + booked)

*entry/exit
bysort propertyid: egen lastmonth = max(obsym)
bysort propertyid: egen firstmonth = min(obsym)
*655 is first month of data
gen entry = 0 if obsym!=655
replace entry = 1 if entry==0 & obsym==firstmonth
*692 is last month of data
gen exit = 0 if obsym!=692
replace exit = 1 if exit==0 & obsym==lastmonth

** include a second definition of exit, last month with positive supply
bysort propertyid: egen lastsupply = max(obsym) if supply>0
bysort propertyid: egen lastsupply2 = min(lastsupply)
gen exit2 = 0
replace exit2 = 1 if obsym==lastsupply2 & lastsupply2!=692
replace exit2=. if obsym>lastsupply2 

gen date = dofm(obsym)
format date %td
gen obsyear = year(date)
drop date

encode metro, gen(metro1)

encode cancellationpolicy, gen(cancel)
replace cancel = 99 if missing(cancellationpolicy)

gen rated = 0
replace rated = 1 if !missing(overallrating)
gen rating1 = overallrating*10
replace rating1 = 99 if missing(overallrating)
gen rating1sq = rating1^2

gen bathrooms1 = bathrooms*2
replace bathrooms1 = 8 if bathrooms1>8 & !missing(bathrooms1)
replace bathrooms1 = 99 if missing(bathrooms)

gen maxguests1 = maxguests
replace maxguests1 = 8 if maxguests1>8 & !missing(maxguests1)

gen minimumstay1 = minimumstay
replace minimumstay1 = 8 if minimumstay>=8 & !missing(minimumstay1)
replace minimumstay1 = 99 if missing(minimumstay)

gen hasphotos = 0
replace hasphotos = 1 if !missing(numberofphotos)
gen photos1 = numberofphotos
replace photos1 = 999 if missing(numberofphotos)
gen photos1sq = photos1^2

drop superhost1
gen superhost1 = 0
replace superhost1 = 1 if inlist(superhost,"True","t")
replace superhost1 = 99 if missing(superhost)

gen superhost_dum = superhost1
replace superhost_dum = . if missing(superhost)

gen securitydepositsq = securitydeposit^2
gen cleaningfeesq = cleaningfee^2
gen extrapeoplefeesq = extrapeoplefee^2

gen businessready_dum = inlist(businessready , "True" "t" )

** get total number if properties listed by host that month 
gen obs=1
bysort hostid bookym: egen num_host_props = sum(obs)
tab num_host_props

gen singlelisted = (num_host_props==1)
gen multilisted = (num_host_props>1)
gen fivepluslisted = (num_host_props>=5)

xtset propertyid obsym

** Save a property-month panel 

save "propmonth_analysis_restricted.dta" , replace

*******************************************************************************
** Further Restricted, dropping uncertain tax obligations pre-enforcement *****
*******************************************************************************
use "combined_monthly_analysis.dta", clear

*Keeping jurisdictions for analysis
keep if ///
(tax_city=="Los Angeles" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Austin" & countyname=="Travis" & state=="Texas") | ///
(tax_city=="San Diego" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Houston" & countyname=="Harris" & state=="Texas") | ///
(tax_city=="Washington" & countyname=="District of Columbia" & state=="District of Columbia") | ///
(tax_city=="Seattle" & countyname=="King" & state=="Washington") | ///
(tax_city=="Boston" & countyname=="Suffolk" & state=="Massachusetts") | ///
(tax_city=="Four Corners" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Atlanta" & countyname=="Fulton" & state=="Georgia") | ///
(tax_city=="Kissimmee" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Oakland" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Cambridge" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Orlando" & countyname=="Orange" & state=="Florida") | ///
(tax_city=="Dallas" & countyname=="Dallas" & state=="Texas") | ///
(tax_city=="Jersey City" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Indianapolis" & countyname=="Marion" & state=="Indiana") | ///
(tax_city=="Minneapolis" & countyname=="Hennepin" & state=="Minnesota") | ///
(tax_city=="Sunny Isles Beach" & countyname=="Miami-Dade" & state=="Florida") | ///
(tax_city=="Santa Clara" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Arlington" & countyname=="Arlington" & state=="Virginia") | ///
(tax_city=="Long Beach" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Mountain View" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Somerville" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Charlotte" & countyname=="Mecklenburg" & state=="North Carolina") | ///
(tax_city=="Galveston" & countyname=="Galveston" & state=="Texas") | ///
(tax_city=="Anchorage" & countyname=="Anchorage" & state=="Alaska") | ///
(tax_city=="Four Corners" & countyname=="Lake" & state=="Florida") | ///
(tax_city=="Pasadena" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Beverly Hills" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Oceanside" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Malibu" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Brookline" & countyname=="Norfolk" & state=="Massachusetts") | ///
(tax_city=="San Mateo" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="West Palm Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Alexandria" & countyname=="Alexandria" & state=="Virginia") | ///
(tax_city=="Atlanta" & countyname=="DeKalb" & state=="Georgia") | ///
(tax_city=="Fort Worth" & countyname=="Tarrant" & state=="Texas") | ///
(tax_city=="Bellevue" & countyname=="King" & state=="Washington") | ///
(tax_city=="Redwood City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Delray Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Glendale" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Menlo Park" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Rowland Heights" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="St. Paul" & countyname=="Ramsey" & state=="Minnesota") | ///
(tax_city=="Evanston" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Milpitas" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Daly City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Newton" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Kirkland" & countyname=="King" & state=="Washington") | ///
(tax_city=="Alameda" & countyname=="Alameda" & state=="California") | ///
(tax_city=="University Place" & countyname=="Pierce" & state=="Washington") | ///
(tax_city=="Richmond" & countyname=="Contra Costa" & state=="California") | ///
(tax_city=="Oak Park" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Redmond" & countyname=="King" & state=="Washington") | ///
(tax_city=="Vashon" & countyname=="King" & state=="Washington")

*this reassigns 23 properties (updated 1-19-18)
replace metro="Oakland" if countyname=="San Mateo" & metro=="SanJose"

*this reassigns 16 properties (updated 2-26-18)
replace metro="LosAngeles" if countyname=="Orange" & metro=="SanDiego"

*dropping pre-period before cities clarified tax obligation
drop if tax_city=="Jersey City" & bookym<663
drop if tax_city=="San Diego" & bookym<661

egen taxjuris = group(tax_city countyname state)
egen countyid = group(countyname state)

sort propertyid obsym

keep if bedrooms<=4
keep if maxguests<=12
drop if listingtype=="Shared room"

*generating mean ask price by property id for dropping of outliers
bysort propertyid: egen meanaskprice = mean(askingprice)
gen meanaskprice1 = meanaskprice
replace meanaskprice1 = . if index!=1

*generating percentiles to next drop properties with outlying prices
bysort taxjuris: egen ask_pctile10 = pctile(meanaskprice1), p(10)
bysort taxjuris: egen ask_pctile90 = pctile(meanaskprice1), p(90)

*only keep those with prices between 10 and 90 percentile of their tax jurisdiction
keep if meanaskprice<=ask_pctile90 & meanaskprice>=ask_pctile10

drop meanaskprice

sort propertyid obsym

gen askpricewithtax = askingprice*(1+(taxvar/100))
gen bookpricewithtax = bookprice*(1+(taxvar/100))

*logging variables of interest
gen lntaxvar = ln(1 + (taxvar/100))
gen lnbookprice = ln(bookprice)
gen lnaskingprice = ln(askingprice)
gen lnbookpricewithtax = ln(bookpricewithtax)
gen lnaskpricewithtax = ln(askpricewithtax)

*do 1 + supply and 1 + booked to deal with zeros.  could also use poisson.
gen lnsupply = ln(1 + supply)
gen lnbooked = ln(1 + booked)

*entry/exit
bysort propertyid: egen lastmonth = max(obsym)
bysort propertyid: egen firstmonth = min(obsym)
*655 is first month of data
gen entry = 0 if obsym!=655
replace entry = 1 if entry==0 & obsym==firstmonth
*692 is last month of data
gen exit = 0 if obsym!=692
replace exit = 1 if exit==0 & obsym==lastmonth

** include a second definition of exit, last month with positive supply
bysort propertyid: egen lastsupply = max(obsym) if supply>0
bysort propertyid: egen lastsupply2 = min(lastsupply)
gen exit2 = 0
replace exit2 = 1 if obsym==lastsupply2 & lastsupply2!=692
replace exit2=. if obsym>lastsupply2 

gen date = dofm(obsym)
format date %td
gen obsyear = year(date)
drop date

encode metro, gen(metro1)

encode cancellationpolicy, gen(cancel)
replace cancel = 99 if missing(cancellationpolicy)

gen rated = 0
replace rated = 1 if !missing(overallrating)
gen rating1 = overallrating*10
replace rating1 = 99 if missing(overallrating)
gen rating1sq = rating1^2

gen bathrooms1 = bathrooms*2
replace bathrooms1 = 8 if bathrooms1>8 & !missing(bathrooms1)
replace bathrooms1 = 99 if missing(bathrooms)

gen maxguests1 = maxguests
replace maxguests1 = 8 if maxguests1>8 & !missing(maxguests1)

gen minimumstay1 = minimumstay
replace minimumstay1 = 8 if minimumstay>=8 & !missing(minimumstay1)
replace minimumstay1 = 99 if missing(minimumstay)

gen hasphotos = 0
replace hasphotos = 1 if !missing(numberofphotos)
gen photos1 = numberofphotos
replace photos1 = 999 if missing(numberofphotos)
gen photos1sq = photos1^2

drop superhost1
gen superhost1 = 0
replace superhost1 = 1 if inlist(superhost,"True","t")
replace superhost1 = 99 if missing(superhost)

gen superhost_dum = superhost1
replace superhost_dum = . if missing(superhost)

gen securitydepositsq = securitydeposit^2
gen cleaningfeesq = cleaningfee^2
gen extrapeoplefeesq = extrapeoplefee^2

gen businessready_dum = inlist(businessready , "True" "t" )

** get total number if properties listed by host that month 
gen obs=1
bysort hostid bookym: egen num_host_props = sum(obs)
tab num_host_props

gen singlelisted = (num_host_props==1)
gen multilisted = (num_host_props>1)
gen fivepluslisted = (num_host_props>=5)

xtset propertyid obsym

** Save a property-month panel 

save "propmonth_analysis_excl_unclear_tax_oblig.dta" , replace



************************************************************************
**Restricted sample - Alternate Property Restrictions*******************
************************************************************************

clear all
set more off

use "combined_monthly_analysis.dta", clear

*Keeping jurisdictions for analysis
keep if ///
(tax_city=="Los Angeles" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Austin" & countyname=="Travis" & state=="Texas") | ///
(tax_city=="San Diego" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Houston" & countyname=="Harris" & state=="Texas") | ///
(tax_city=="Washington" & countyname=="District of Columbia" & state=="District of Columbia") | ///
(tax_city=="Seattle" & countyname=="King" & state=="Washington") | ///
(tax_city=="Boston" & countyname=="Suffolk" & state=="Massachusetts") | ///
(tax_city=="New Orleans" & countyname=="Orleans" & state=="Louisiana") | ///
(tax_city=="Four Corners" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Atlanta" & countyname=="Fulton" & state=="Georgia") | ///
(tax_city=="Kissimmee" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Oakland" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Cambridge" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Phoenix" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Orlando" & countyname=="Orange" & state=="Florida") | ///
(tax_city=="Dallas" & countyname=="Dallas" & state=="Texas") | ///
(tax_city=="Jersey City" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Scottsdale" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Indianapolis" & countyname=="Marion" & state=="Indiana") | ///
(tax_city=="Salt Lake City" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Minneapolis" & countyname=="Hennepin" & state=="Minnesota") | ///
(tax_city=="Sunny Isles Beach" & countyname=="Miami-Dade" & state=="Florida") | ///
(tax_city=="Santa Clara" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Arlington" & countyname=="Arlington" & state=="Virginia") | ///
(tax_city=="Long Beach" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Mountain View" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Somerville" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Charlotte" & countyname=="Mecklenburg" & state=="North Carolina") | ///
(tax_city=="Galveston" & countyname=="Galveston" & state=="Texas") | ///
(tax_city=="Anchorage" & countyname=="Anchorage" & state=="Alaska") | ///
(tax_city=="Four Corners" & countyname=="Lake" & state=="Florida") | ///
(tax_city=="Pasadena" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Beverly Hills" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Oceanside" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Malibu" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Brookline" & countyname=="Norfolk" & state=="Massachusetts") | ///
(tax_city=="Huntington Beach" & countyname=="Orange" & state=="California") | ///
(tax_city=="Tempe" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Hoboken" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="San Mateo" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="West Palm Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Alexandria" & countyname=="Alexandria" & state=="Virginia") | ///
(tax_city=="Atlanta" & countyname=="DeKalb" & state=="Georgia") | ///
(tax_city=="Fort Worth" & countyname=="Tarrant" & state=="Texas") | ///
(tax_city=="Bellevue" & countyname=="King" & state=="Washington") | ///
(tax_city=="Redwood City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Fremont" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Mesa" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Delray Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Glendale" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Menlo Park" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Rowland Heights" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="St. Paul" & countyname=="Ramsey" & state=="Minnesota") | ///
(tax_city=="Culver City" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Weehawken" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Evanston" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Milpitas" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Daly City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Lakewood" & countyname=="Jefferson" & state=="Colorado") | ///
(tax_city=="Costa Mesa" & countyname=="Orange" & state=="California") | ///
(tax_city=="Sandy" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Newton" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Kirkland" & countyname=="King" & state=="Washington") | ///
(tax_city=="Alameda" & countyname=="Alameda" & state=="California") | ///
(tax_city=="West New York" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Newark" & countyname=="Essex" & state=="New Jersey") | ///
(tax_city=="Silver Spring" & countyname=="Montgomery" & state=="Maryland") | /// 
(tax_city=="Cleveland Heights" & countyname=="Cuyahoga" & state=="Ohio") | ///
(tax_city=="Bethesda" & countyname=="Montgomery" & state=="Maryland") | ///
(tax_city=="University Place" & countyname=="Pierce" & state=="Washington") | ///
(tax_city=="Richmond" & countyname=="Contra Costa" & state=="California") | ///
(tax_city=="Oak Park" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Lakewood" & countyname=="Cuyahoga" & state=="Ohio") | ///
(tax_city=="Millcreek" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Redmond" & countyname=="King" & state=="Washington") | ///
(tax_city=="Metairie" & countyname=="Jefferson" & state=="Louisiana") | ///
(tax_city=="Golden" & countyname=="Jefferson" & state=="Colorado") | ///
(tax_city=="Vashon" & countyname=="King" & state=="Washington") ///

*this reassigns 23 properties (updated 1-19-18)
replace metro="Oakland" if countyname=="San Mateo" & metro=="SanJose"

*this reassigns 16 properties (updated 2-26-18)
replace metro="LosAngeles" if countyname=="Orange" & metro=="SanDiego"

egen taxjuris = group(tax_city countyname state)
egen countyid = group(countyname state)

sort propertyid obsym

gen bedroomsleq4 = 0
replace bedroomsleq4 = 1 if bedrooms<=4

gen maxguestsleq12 = 0
replace maxguestsleq12 = 1 if maxguests<=12

gen sharedroom = 0
replace sharedroom = 1 if listingtype=="Shared room"

gen privateroom = 0
replace privateroom = 1 if listingtype=="Private room"

gen entirehome = 0
replace entirehome = 1 if listingtype=="Entire home/apt"

*generating mean ask price by property id for dropping of outliers
bysort propertyid: egen meanaskprice = mean(askingprice)
gen meanaskprice1 = meanaskprice
replace meanaskprice1 = . if index!=1

*generating percentiles to next drop properties with outlying prices
bysort taxjuris: egen ask_pctile10 = pctile(meanaskprice1), p(10)
bysort taxjuris: egen ask_pctile90 = pctile(meanaskprice1), p(90)
bysort taxjuris: egen ask_pctile25 = pctile(meanaskprice1), p(25)
bysort taxjuris: egen ask_pctile75 = pctile(meanaskprice1), p(75)
bysort taxjuris: egen ask_pctile5 = pctile(meanaskprice1), p(5)
bysort taxjuris: egen ask_pctile95 = pctile(meanaskprice1), p(95)
bysort taxjuris: egen ask_pctile50 = pctile(meanaskprice1), p(50)

*only keep those with prices between 10 and 90 percentile of their tax jurisdiction
*keep if meanaskprice<=ask_pctile90 & meanaskprice>=ask_pctile10

gen askprice1090 = 0
replace askprice1090 = 1 if meanaskprice<=ask_pctile90 & meanaskprice>=ask_pctile10

gen askprice595 = 0
replace askprice595 = 1 if meanaskprice<=ask_pctile95 & meanaskprice>=ask_pctile5

gen askprice2575 = 0
replace askprice2575 = 1 if meanaskprice<=ask_pctile75 & meanaskprice>=ask_pctile25

gen askprice_quartile1 = 0
replace askprice_quartile1 = 1 if meanaskprice<=ask_pctile25

gen askprice_quartile2 = 0
replace askprice_quartile2 = 1 if meanaskprice>ask_pctile25 & meanaskprice<=ask_pctile50

gen askprice_quartile3 = 0
replace askprice_quartile3 = 1 if meanaskprice<=ask_pctile75 & meanaskprice>ask_pctile50

gen askprice_quartile4 = 0
replace askprice_quartile4 = 1 if meanaskprice>ask_pctile75


drop meanaskprice

sort propertyid obsym

gen askpricewithtax = askingprice*(1+(taxvar/100))
gen bookpricewithtax = bookprice*(1+(taxvar/100))

*logging variables of interest
gen lntaxvar = ln(1 + (taxvar/100))
gen lnbookprice = ln(bookprice)
gen lnaskingprice = ln(askingprice)
gen lnbookpricewithtax = ln(bookpricewithtax)
gen lnaskpricewithtax = ln(askpricewithtax)

*do 1 + supply and 1 + booked to deal with zeros.  could also use poisson.
gen lnsupply = ln(1 + supply)
gen lnbooked = ln(1 + booked)

*entry/exit
bysort propertyid: egen lastmonth = max(obsym)
bysort propertyid: egen firstmonth = min(obsym)
*655 is first month of data
gen entry = 0 if obsym!=655
replace entry = 1 if entry==0 & obsym==firstmonth
*692 is last month of data
gen exit = 0 if obsym!=692
replace exit = 1 if exit==0 & obsym==lastmonth

** include a second definition of exit, last month with positive supply
bysort propertyid: egen lastsupply = max(obsym) if supply>0
bysort propertyid: egen lastsupply2 = min(lastsupply)
gen exit2 = 0
replace exit2 = 1 if obsym==lastsupply2 & lastsupply2!=692
replace exit2=. if obsym>lastsupply2 

gen date = dofm(obsym)
format date %td
gen obsyear = year(date)
drop date

encode metro, gen(metro1)

encode cancellationpolicy, gen(cancel)
replace cancel = 99 if missing(cancellationpolicy)

gen rated = 0
replace rated = 1 if !missing(overallrating)
gen rating1 = overallrating*10
replace rating1 = 99 if missing(overallrating)
gen rating1sq = rating1^2

gen bathrooms1 = bathrooms*2
replace bathrooms1 = 8 if bathrooms1>8 & !missing(bathrooms1)
replace bathrooms1 = 99 if missing(bathrooms)

gen maxguests1 = maxguests
replace maxguests1 = 8 if maxguests1>8 & !missing(maxguests1)

gen minimumstay1 = minimumstay
replace minimumstay1 = 8 if minimumstay>=8 & !missing(minimumstay1)
replace minimumstay1 = 99 if missing(minimumstay)

gen hasphotos = 0
replace hasphotos = 1 if !missing(numberofphotos)
gen photos1 = numberofphotos
replace photos1 = 999 if missing(numberofphotos)
gen photos1sq = photos1^2

drop superhost1
gen superhost1 = 0
replace superhost1 = 1 if inlist(superhost,"True","t")
replace superhost1 = 99 if missing(superhost)

gen superhost_dum = superhost1
replace superhost_dum = . if missing(superhost)

gen securitydepositsq = securitydeposit^2
gen cleaningfeesq = cleaningfee^2
gen extrapeoplefeesq = extrapeoplefee^2

gen businessready_dum = inlist(businessready , "True" "t" )

*generate distance from city center
bysort tax_city state: gen cityindex = _n
opencagegeo if cityindex==1, city(tax_city) state(state) key(2390681d2fbf1301f4bc047b9f00e8f4) replace
gen g_lat1 = real(g_lat)
gen g_lon1 = real(g_lon)
bysort taxjuris: egen center_lat = min(g_lat1)
bysort taxjuris: egen center_lon = min(g_lon1)

vincenty latitude longitude center_lat center_lon, vin(distfromcenter)


** get total number if properties listed by host that month 
gen obs=1
bysort hostid bookym: egen num_host_props = sum(obs)
tab num_host_props

gen singlelisted = (num_host_props==1)
gen multilisted = (num_host_props>1)
gen fivepluslisted = (num_host_props>=5)

xtset propertyid obsym

** Save a property-month panel 

save "propmonth_analysis_restricted - alternate property restrictions.dta" , replace


******************************************************************************************
**Restricted sample - Alternate Property Restrictions - Dropping Shared Room Listings*****
******************************************************************************************

clear all
set more off

** setting file paths

use "combined_monthly_analysis.dta", clear

*Keeping jurisdictions for analysis
keep if ///
(tax_city=="Los Angeles" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Austin" & countyname=="Travis" & state=="Texas") | ///
(tax_city=="San Diego" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Houston" & countyname=="Harris" & state=="Texas") | ///
(tax_city=="Washington" & countyname=="District of Columbia" & state=="District of Columbia") | ///
(tax_city=="Seattle" & countyname=="King" & state=="Washington") | ///
(tax_city=="Boston" & countyname=="Suffolk" & state=="Massachusetts") | ///
(tax_city=="New Orleans" & countyname=="Orleans" & state=="Louisiana") | ///
(tax_city=="Four Corners" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Atlanta" & countyname=="Fulton" & state=="Georgia") | ///
(tax_city=="Kissimmee" & countyname=="Osceola" & state=="Florida") | ///
(tax_city=="Oakland" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Cambridge" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Phoenix" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Orlando" & countyname=="Orange" & state=="Florida") | ///
(tax_city=="Dallas" & countyname=="Dallas" & state=="Texas") | ///
(tax_city=="Jersey City" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Scottsdale" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Indianapolis" & countyname=="Marion" & state=="Indiana") | ///
(tax_city=="Salt Lake City" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Minneapolis" & countyname=="Hennepin" & state=="Minnesota") | ///
(tax_city=="Sunny Isles Beach" & countyname=="Miami-Dade" & state=="Florida") | ///
(tax_city=="Santa Clara" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Arlington" & countyname=="Arlington" & state=="Virginia") | ///
(tax_city=="Long Beach" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Mountain View" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Somerville" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Charlotte" & countyname=="Mecklenburg" & state=="North Carolina") | ///
(tax_city=="Galveston" & countyname=="Galveston" & state=="Texas") | ///
(tax_city=="Anchorage" & countyname=="Anchorage" & state=="Alaska") | ///
(tax_city=="Four Corners" & countyname=="Lake" & state=="Florida") | ///
(tax_city=="Pasadena" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Beverly Hills" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Oceanside" & countyname=="San Diego" & state=="California") | ///
(tax_city=="Malibu" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Brookline" & countyname=="Norfolk" & state=="Massachusetts") | ///
(tax_city=="Huntington Beach" & countyname=="Orange" & state=="California") | ///
(tax_city=="Tempe" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Hoboken" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="San Mateo" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="West Palm Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Alexandria" & countyname=="Alexandria" & state=="Virginia") | ///
(tax_city=="Atlanta" & countyname=="DeKalb" & state=="Georgia") | ///
(tax_city=="Fort Worth" & countyname=="Tarrant" & state=="Texas") | ///
(tax_city=="Bellevue" & countyname=="King" & state=="Washington") | ///
(tax_city=="Redwood City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Fremont" & countyname=="Alameda" & state=="California") | ///
(tax_city=="Mesa" & countyname=="Maricopa" & state=="Arizona") | ///
(tax_city=="Delray Beach" & countyname=="Palm Beach" & state=="Florida") | ///
(tax_city=="Glendale" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Menlo Park" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Rowland Heights" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="St. Paul" & countyname=="Ramsey" & state=="Minnesota") | ///
(tax_city=="Culver City" & countyname=="Los Angeles" & state=="California") | ///
(tax_city=="Weehawken" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Evanston" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Milpitas" & countyname=="Santa Clara" & state=="California") | ///
(tax_city=="Daly City" & countyname=="San Mateo" & state=="California") | ///
(tax_city=="Lakewood" & countyname=="Jefferson" & state=="Colorado") | ///
(tax_city=="Costa Mesa" & countyname=="Orange" & state=="California") | ///
(tax_city=="Sandy" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Newton" & countyname=="Middlesex" & state=="Massachusetts") | ///
(tax_city=="Kirkland" & countyname=="King" & state=="Washington") | ///
(tax_city=="Alameda" & countyname=="Alameda" & state=="California") | ///
(tax_city=="West New York" & countyname=="Hudson" & state=="New Jersey") | ///
(tax_city=="Newark" & countyname=="Essex" & state=="New Jersey") | ///
(tax_city=="Silver Spring" & countyname=="Montgomery" & state=="Maryland") | /// 
(tax_city=="Cleveland Heights" & countyname=="Cuyahoga" & state=="Ohio") | ///
(tax_city=="Bethesda" & countyname=="Montgomery" & state=="Maryland") | ///
(tax_city=="University Place" & countyname=="Pierce" & state=="Washington") | ///
(tax_city=="Richmond" & countyname=="Contra Costa" & state=="California") | ///
(tax_city=="Oak Park" & countyname=="Cook" & state=="Illinois") | ///
(tax_city=="Lakewood" & countyname=="Cuyahoga" & state=="Ohio") | ///
(tax_city=="Millcreek" & countyname=="Salt Lake" & state=="Utah") | ///
(tax_city=="Redmond" & countyname=="King" & state=="Washington") | ///
(tax_city=="Metairie" & countyname=="Jefferson" & state=="Louisiana") | ///
(tax_city=="Golden" & countyname=="Jefferson" & state=="Colorado") | ///
(tax_city=="Vashon" & countyname=="King" & state=="Washington") ///

*this reassigns 23 properties (updated 1-19-18)
replace metro="Oakland" if countyname=="San Mateo" & metro=="SanJose"

*this reassigns 16 properties (updated 2-26-18)
replace metro="LosAngeles" if countyname=="Orange" & metro=="SanDiego"

egen taxjuris = group(tax_city countyname state)
egen countyid = group(countyname state)

sort propertyid obsym

gen bedroomsleq4 = 0
replace bedroomsleq4 = 1 if bedrooms<=4

gen maxguestsleq12 = 0
replace maxguestsleq12 = 1 if maxguests<=12

*!!!Key difference from above!!! - matters for price percentiles
drop if listingtype=="Shared room"

gen privateroom = 0
replace privateroom = 1 if listingtype=="Private room"

gen entirehome = 0
replace entirehome = 1 if listingtype=="Entire home/apt"

*generating mean ask price by property id for dropping of outliers
bysort propertyid: egen meanaskprice = mean(askingprice)
gen meanaskprice1 = meanaskprice
replace meanaskprice1 = . if index!=1

*generating percentiles to next drop properties with outlying prices
bysort taxjuris: egen ask_pctile10 = pctile(meanaskprice1), p(10)
bysort taxjuris: egen ask_pctile90 = pctile(meanaskprice1), p(90)
bysort taxjuris: egen ask_pctile25 = pctile(meanaskprice1), p(25)
bysort taxjuris: egen ask_pctile75 = pctile(meanaskprice1), p(75)
bysort taxjuris: egen ask_pctile5 = pctile(meanaskprice1), p(5)
bysort taxjuris: egen ask_pctile95 = pctile(meanaskprice1), p(95)
bysort taxjuris: egen ask_pctile50 = pctile(meanaskprice1), p(50)

*only keep those with prices between 10 and 90 percentile of their tax jurisdiction
*keep if meanaskprice<=ask_pctile90 & meanaskprice>=ask_pctile10

gen askprice1090 = 0
replace askprice1090 = 1 if meanaskprice<=ask_pctile90 & meanaskprice>=ask_pctile10

gen askprice595 = 0
replace askprice595 = 1 if meanaskprice<=ask_pctile95 & meanaskprice>=ask_pctile5

gen askprice2575 = 0
replace askprice2575 = 1 if meanaskprice<=ask_pctile75 & meanaskprice>=ask_pctile25

gen askprice_quartile1 = 0
replace askprice_quartile1 = 1 if meanaskprice<=ask_pctile25

gen askprice_quartile2 = 0
replace askprice_quartile2 = 1 if meanaskprice>ask_pctile25 & meanaskprice<=ask_pctile50

gen askprice_quartile3 = 0
replace askprice_quartile3 = 1 if meanaskprice<=ask_pctile75 & meanaskprice>ask_pctile50

gen askprice_quartile4 = 0
replace askprice_quartile4 = 1 if meanaskprice>ask_pctile75


drop meanaskprice

sort propertyid obsym

gen askpricewithtax = askingprice*(1+(taxvar/100))
gen bookpricewithtax = bookprice*(1+(taxvar/100))

*logging variables of interest
gen lntaxvar = ln(1 + (taxvar/100))
gen lnbookprice = ln(bookprice)
gen lnaskingprice = ln(askingprice)
gen lnbookpricewithtax = ln(bookpricewithtax)
gen lnaskpricewithtax = ln(askpricewithtax)

*do 1 + supply and 1 + booked to deal with zeros.  could also use poisson.
gen lnsupply = ln(1 + supply)
gen lnbooked = ln(1 + booked)

*entry/exit
bysort propertyid: egen lastmonth = max(obsym)
bysort propertyid: egen firstmonth = min(obsym)
*655 is first month of data
gen entry = 0 if obsym!=655
replace entry = 1 if entry==0 & obsym==firstmonth
*692 is last month of data
gen exit = 0 if obsym!=692
replace exit = 1 if exit==0 & obsym==lastmonth

** include a second definition of exit, last month with positive supply
bysort propertyid: egen lastsupply = max(obsym) if supply>0
bysort propertyid: egen lastsupply2 = min(lastsupply)
gen exit2 = 0
replace exit2 = 1 if obsym==lastsupply2 & lastsupply2!=692
replace exit2=. if obsym>lastsupply2 

gen date = dofm(obsym)
format date %td
gen obsyear = year(date)
drop date

encode metro, gen(metro1)

encode cancellationpolicy, gen(cancel)
replace cancel = 99 if missing(cancellationpolicy)

gen rated = 0
replace rated = 1 if !missing(overallrating)
gen rating1 = overallrating*10
replace rating1 = 99 if missing(overallrating)
gen rating1sq = rating1^2

gen bathrooms1 = bathrooms*2
replace bathrooms1 = 8 if bathrooms1>8 & !missing(bathrooms1)
replace bathrooms1 = 99 if missing(bathrooms)

gen maxguests1 = maxguests
replace maxguests1 = 8 if maxguests1>8 & !missing(maxguests1)

gen minimumstay1 = minimumstay
replace minimumstay1 = 8 if minimumstay>=8 & !missing(minimumstay1)
replace minimumstay1 = 99 if missing(minimumstay)

gen hasphotos = 0
replace hasphotos = 1 if !missing(numberofphotos)
gen photos1 = numberofphotos
replace photos1 = 999 if missing(numberofphotos)
gen photos1sq = photos1^2

drop superhost1
gen superhost1 = 0
replace superhost1 = 1 if inlist(superhost,"True","t")
replace superhost1 = 99 if missing(superhost)

gen superhost_dum = superhost1
replace superhost_dum = . if missing(superhost)

gen securitydepositsq = securitydeposit^2
gen cleaningfeesq = cleaningfee^2
gen extrapeoplefeesq = extrapeoplefee^2

gen businessready_dum = inlist(businessready , "True" "t" )

*generate distance from city center
bysort tax_city state: gen cityindex = _n
opencagegeo if cityindex==1, city(tax_city) state(state) key(2390681d2fbf1301f4bc047b9f00e8f4) replace
gen g_lat1 = real(g_lat)
gen g_lon1 = real(g_lon)
bysort taxjuris: egen center_lat = min(g_lat1)
bysort taxjuris: egen center_lon = min(g_lon1)

vincenty latitude longitude center_lat center_lon, vin(distfromcenter)


** get total number if properties listed by host that month 
gen obs=1
bysort hostid bookym: egen num_host_props = sum(obs)
tab num_host_props

gen singlelisted = (num_host_props==1)
gen multilisted = (num_host_props>1)
gen fivepluslisted = (num_host_props>=5)

xtset propertyid obsym

** Save a property-month panel 

save "propmonth_analysis_restricted - alternate property restrictions - no shared room.dta" , replace



















